1 /*
2 * speedtableHash.c --
3 *
4 * Ctables implementation of in-memory hash tables, adapted from Tcl
5 *
6 * Copyright (c) 1991-1993 The Regents of the University of California.
7 * Copyright (c) 1994 Sun Microsystems, Inc.
8 *
9 * See the file "license.terms" for information on usage and redistribution of
10 * this file, and for a DISCLAIMER OF ALL WARRANTIES.
11 *
12 * RCS: @(#) $Id$
13 */
14
15 //#include "tclInt.h"
16 #include <tcl.h>
17
18 /*
19 * When there are this many entries per bucket, on average, rebuild the hash
20 * table to make it larger.
21 */
22
23 #define REBUILD_MULTIPLIER 3
24
25 /*
26 * The following macro takes a preliminary integer hash value and produces an
27 * index into a hash tables bucket list. The idea is to make it so that
28 * preliminary values that are arbitrarily similar will end up in different
29 * buckets. The hash function was taken from a random-number generator.
30 */
31
32 #define RANDOM_INDEX(tablePtr, i) \
33 (((((long) (i))*1103515245) >> (tablePtr)->downShift) & (tablePtr)->mask)
34
35 /*
36 * Prototypes for the string hash key methods.
37 */
38
39 static int CompareStringKeys(ctable_HashTable *tablePtr, VOID *keyPtr, ctable_HashEntry *hPtr);
40 static unsigned int HashStringKey(ctable_HashTable *tablePtr, VOID *keyPtr);
41
42 /*
43 * Function prototypes for static functions in this file:
44 */
45
46 static void RebuildTable(ctable_HashTable *tablePtr);
47
48
49 /*
50 *----------------------------------------------------------------------
51 *
52 * ctable_InitHashTable --
53 *
54 * Given storage for a hash table, set up the fields to prepare the hash
55 * table for use.
56 *
57 * Results:
58 * None.
59 *
60 * Side effects:
61 * TablePtr is now ready to be passed to ctable_FindHashEntry and
62 * ctable_CreateHashEntry.
63 *
64 *----------------------------------------------------------------------
65 */
66
67 void
ctable_InitHashTable(ctable_HashTable * tablePtr)68 ctable_InitHashTable(ctable_HashTable *tablePtr)
69 /* Pointer to table record, which is supplied
70 * by the caller. */
71 {
72 int i;
73
74 #if (CTABLE_SMALL_HASH_TABLE != 16)
75 Tcl_Panic("ctable_InitCustomHashTable: CTABLE_SMALL_HASH_TABLE is %d, not 16",
76 CTABLE_SMALL_HASH_TABLE);
77 #endif
78
79 tablePtr->buckets = tablePtr->staticBuckets;
80
81 for (i = 0; i < CTABLE_SMALL_HASH_TABLE; i++) {
82 tablePtr->staticBuckets[i] = 0;
83 }
84
85 tablePtr->numBuckets = CTABLE_SMALL_HASH_TABLE;
86 tablePtr->numEntries = 0;
87 tablePtr->rebuildSize = CTABLE_SMALL_HASH_TABLE*REBUILD_MULTIPLIER;
88 tablePtr->downShift = 26;
89 tablePtr->mask = 15;
90 }
91
92 /*
93 * defaultHashKeyProc --
94 *
95 * Given a pointer of a type with an unknown hash key proc, generate
96 * a hash for it.
97 */
98 #if defined(__amd64__) || defined(__ia64__)
99 # define defaultHashKeyProc(p) \
100 ((((quad_t)(p)) & 0xFFFFFFFF) ^ (((quad_t)(p)) >> 32))
101 #else
102 # define defaultHashKeyProc(p) ((unsigned int)(p))
103 #endif
104
105
106 /*
107 *----------------------------------------------------------------------
108 *
109 * ctable_InitOrStoreHashEntry --
110 *
111 * Given a hash table with string keys, and a string key, find the entry
112 * with a matching key. If there is no matching entry, then link up
113 * the hashEntry provided, or a new one, into the table.
114 *
115 * Results:
116 * The return value is a pointer to the matching entry. If the entry
117 * is getting inserted, then *newPtr will be set to a non-zero value;
118 * otherwise *newPtr will be set to 0. If this is a new entry the value
119 * stored in the entry will initially be 0.
120 * if NewPtr is NULL, then
121 *
122 * Side effects:
123 * A new entry may be added to the hash table.
124 *
125 *----------------------------------------------------------------------
126 */
127
128 static
129 inline
130 ctable_HashEntry *
ctable_InitOrStoreHashEntry(ctable_HashTable * tablePtr,CONST char * key,ctable_HashEntry * newEntry,int flags,int * newPtr)131 ctable_InitOrStoreHashEntry(
132 ctable_HashTable *tablePtr, /* Table in which to lookup entry. */
133 CONST char *key, /* Key to use to find or create matching
134 * entry. */
135 ctable_HashEntry *newEntry, /* if not null, use this entry */
136 int flags, /* options */
137 int *newPtr) /* Store info here telling whether a new entry
138 * was created. */
139 {
140 ctable_HashEntry *hPtr;
141 unsigned int hash;
142 int index;
143
144 hash = HashStringKey (tablePtr, (VOID *) key);
145 index = RANDOM_INDEX (tablePtr, hash);
146
147 /*
148 * Search all of the entries in the appropriate bucket.
149 */
150
151 for (hPtr = tablePtr->buckets[index]; hPtr != NULL;
152 hPtr = hPtr->nextPtr) {
153
154 if (hash != (unsigned int)(hPtr->hash)) {
155 continue;
156 }
157
158 if (!CompareStringKeys(tablePtr, (VOID *) key, hPtr)) {
159 if (newPtr)
160 *newPtr = 0;
161 return hPtr;
162 }
163 }
164
165 if (!newPtr)
166 return NULL;
167
168 if (newEntry)
169 hPtr = newEntry;
170 else
171 return NULL; /* Can't allocate */
172
173 /*
174 * Entry not found. Add a new one to the bucket.
175 */
176
177 *newPtr = 1;
178
179 /* Allocate a new key if needed */
180 if(newEntry && (flags & KEY_MASK) == KEY_VOLATILE) {
181 hPtr->key = (char *) ckalloc (strlen (key) + 1);
182 strcpy (hPtr->key, key);
183 } else {
184 hPtr->key = (char *)key;
185 }
186
187 hPtr->hash = hash;
188 hPtr->nextPtr = tablePtr->buckets[index];
189 tablePtr->buckets[index] = hPtr;
190 tablePtr->numEntries++;
191
192 /*
193 * If the table has exceeded a decent size, rebuild it with many more
194 * buckets.
195 */
196
197 if (tablePtr->numEntries >= tablePtr->rebuildSize) {
198 RebuildTable(tablePtr);
199 }
200 return hPtr;
201 }
202
203 /*
204 *----------------------------------------------------------------------
205 *
206 * ctable_InitHashEntry --
207 *
208 * Given a hash table with string keys, and a string key, find the entry
209 * with a matching key. If there is no matching entry, then link up
210 * a new hashEntry into the table.
211 *
212 * Results:
213 * The return value is a pointer to the matching entry. If the entry
214 * is getting inserted, then *newPtr will be set to a non-zero value;
215 * otherwise *newPtr will be set to 0. If this is a new entry the value
216 * stored in the entry will initially be 0.
217 *
218 * Side effects:
219 * A new entry may be added to the hash table.
220 *
221 *----------------------------------------------------------------------
222 */
223
224 static
225 inline
226 ctable_HashEntry *
ctable_InitHashEntry(ctable_HashTable * tablePtr,CONST char * key,int * newPtr)227 ctable_InitHashEntry(
228 ctable_HashTable *tablePtr, /* Table in which to lookup entry. */
229 CONST char *key, /* Key to use to find or create matching
230 * entry. */
231 int *newPtr) /* Store info here telling whether a new entry
232 * was created. */
233 {
234 return
235 ctable_InitOrStoreHashEntry(tablePtr, key, NULL, 0, newPtr);
236 }
237
238 /*
239 *----------------------------------------------------------------------
240 *
241 * ctable_StoreHashEntry --
242 *
243 * Given a hash table with string keys, and a string key, find the entry
244 * with a matching key. If there is no matching entry, then store the
245 * provided entry into the table.
246 *
247 * Results:
248 * The return value is a pointer to the matching entry. If the entry
249 * is getting inserted, then *newPtr will be set to a non-zero value;
250 * otherwise *newPtr will be set to 0. If this is a new entry the value
251 * stored in the entry will initially be 0.
252 *
253 * Side effects:
254 * A new entry may be added to the hash table.
255 *
256 *----------------------------------------------------------------------
257 */
258
259 static
260 inline
261 ctable_HashEntry *
ctable_StoreHashEntry(ctable_HashTable * tablePtr,CONST char * key,ctable_HashEntry * newEntry,int flags,int * newPtr)262 ctable_StoreHashEntry(
263 ctable_HashTable *tablePtr, /* Table in which to lookup entry. */
264 CONST char *key, /* Key to use to find or create matching
265 * entry. */
266 ctable_HashEntry *newEntry, /* if not null, use this entry */
267 int flags, /* options */
268 int *newPtr) /* Store info here telling whether a new entry
269 * was created. */
270 {
271 return
272 ctable_InitOrStoreHashEntry(tablePtr, key, newEntry, flags, newPtr);
273 }
274
275
276 /*
277 *----------------------------------------------------------------------
278 *
279 * ctable_FindHashEntry --
280 *
281 * Given a hash table find the entry with a matching key.
282 *
283 * Results:
284 * The return value is a token for the matching entry in the hash table,
285 * or NULL if there was no matching entry.
286 *
287 * Side effects:
288 * None.
289 *
290 *----------------------------------------------------------------------
291 */
292 inline
293 ctable_HashEntry *
ctable_FindHashEntry(ctable_HashTable * tablePtr,CONST char * key)294 ctable_FindHashEntry(
295 ctable_HashTable *tablePtr, /* Table in which to lookup entry. */
296 CONST char *key) /* Key to use to find matching entry. */
297 {
298 return ctable_InitHashEntry (tablePtr, key, NULL);
299 }
300
301 /*
302 *----------------------------------------------------------------------
303 *
304 * ctable_DeleteHashEntry --
305 *
306 * Remove a single entry from a hash table.
307 *
308 * Results:
309 * None.
310 *
311 * Side effects:
312 * The entry given by entryPtr is deleted from its table and should never
313 * again be used by the caller. It is up to the caller to free the
314 * clientData field of the entry, if that is relevant.
315 *
316 *----------------------------------------------------------------------
317 */
318 void
ctable_DeleteHashEntry(ctable_HashTable * tablePtr,ctable_HashEntry * entryPtr,char * nullKeyValue)319 ctable_DeleteHashEntry(ctable_HashTable *tablePtr, ctable_HashEntry *entryPtr, char *nullKeyValue)
320 {
321 ctable_HashEntry *prevPtr;
322 ctable_HashEntry **bucketPtr;
323 int index;
324
325 index = RANDOM_INDEX (tablePtr, entryPtr->hash);
326
327 bucketPtr = &(tablePtr->buckets[index]);
328
329 if (*bucketPtr == entryPtr) {
330 *bucketPtr = entryPtr->nextPtr;
331 } else {
332 for (prevPtr = *bucketPtr; ; prevPtr = prevPtr->nextPtr) {
333 if (prevPtr == NULL) {
334 Tcl_Panic("malformed bucket chain in ctable_DeleteHashEntry");
335 }
336 if (prevPtr->nextPtr == entryPtr) {
337 prevPtr->nextPtr = entryPtr->nextPtr;
338 break;
339 }
340 }
341 }
342
343 tablePtr->numEntries--;
344
345 if(entryPtr->key != nullKeyValue)
346 ckfree (entryPtr->key);
347 }
348
349
350 /*
351 *----------------------------------------------------------------------
352 *
353 * ctable_DeleteHashTable --
354 *
355 * Free up everything associated with a hash table except for the record
356 * for the table itself.
357 *
358 * Results:
359 * None.
360 *
361 * Side effects:
362 * The hash table is no longer useable.
363 *
364 *----------------------------------------------------------------------
365 */
366
367 void
ctable_DeleteHashTable(ctable_HashTable * tablePtr)368 ctable_DeleteHashTable(ctable_HashTable *tablePtr) /* Table to delete. */
369 {
370 /*
371 * Free up the bucket array, if it was dynamically allocated.
372 */
373
374 if (tablePtr->buckets != tablePtr->staticBuckets) {
375 ckfree((char*)tablePtr->buckets);
376 }
377 }
378
379 /*
380 *----------------------------------------------------------------------
381 *
382 * ctable_FirstHashEntry --
383 *
384 * Locate the first entry in a hash table and set up a record that can be
385 * used to step through all the remaining entries of the table.
386 *
387 * Results:
388 * The return value is a pointer to the first entry in tablePtr, or NULL
389 * if tablePtr has no entries in it. The memory at *searchPtr is
390 * initialized so that subsequent calls to ctable_NextHashEntry will return
391 * all of the entries in the table, one at a time.
392 *
393 * Side effects:
394 * None.
395 *
396 *----------------------------------------------------------------------
397 */
398
399 ctable_HashEntry *
ctable_FirstHashEntry(ctable_HashTable * tablePtr,ctable_HashSearch * searchPtr)400 ctable_FirstHashEntry(
401 ctable_HashTable *tablePtr, /* Table to search. */
402 ctable_HashSearch *searchPtr) /* Place to store information about progress
403 * through the table. */
404 {
405 searchPtr->tablePtr = tablePtr;
406 searchPtr->nextIndex = 0;
407 searchPtr->nextEntryPtr = NULL;
408 return ctable_NextHashEntry(searchPtr);
409 }
410
411 /*
412 *----------------------------------------------------------------------
413 *
414 * ctable_NextHashEntry --
415 *
416 * Once a hash table enumeration has been initiated by calling
417 * ctable_FirstHashEntry, this function may be called to return successive
418 * elements of the table.
419 *
420 * Results:
421 * The return value is the next entry in the hash table being enumerated,
422 * or NULL if the end of the table is reached.
423 *
424 * Side effects:
425 * None.
426 *
427 *----------------------------------------------------------------------
428 */
429
430 ctable_HashEntry *
ctable_NextHashEntry(ctable_HashSearch * searchPtr)431 ctable_NextHashEntry(ctable_HashSearch *searchPtr)
432 /* Place to store information about progress
433 * through the table. Must have been
434 * initialized by calling
435 * ctable_FirstHashEntry. */
436 {
437 ctable_HashEntry *hPtr;
438 ctable_HashTable *tablePtr = searchPtr->tablePtr;
439
440 while (searchPtr->nextEntryPtr == NULL) {
441 if (searchPtr->nextIndex >= tablePtr->numBuckets) {
442 return NULL;
443 }
444 searchPtr->nextEntryPtr =
445 tablePtr->buckets[searchPtr->nextIndex];
446 searchPtr->nextIndex++;
447 }
448 hPtr = searchPtr->nextEntryPtr;
449 searchPtr->nextEntryPtr = hPtr->nextPtr;
450 return hPtr;
451 }
452
453 /*
454 *----------------------------------------------------------------------
455 *
456 * ctable_HashStats --
457 *
458 * Return statistics describing the layout of the hash table in its hash
459 * buckets.
460 *
461 * Results:
462 * The return value is a malloc-ed string containing information about
463 * tablePtr. It is the caller's responsibility to free this string.
464 *
465 * Side effects:
466 * None.
467 *
468 *----------------------------------------------------------------------
469 */
470
471 CONST char *
ctable_HashStats(ctable_HashTable * tablePtr)472 ctable_HashStats(
473 ctable_HashTable *tablePtr) /* Table for which to produce stats. */
474 {
475 #define NUM_COUNTERS 10
476 int count[NUM_COUNTERS], overflow, i, j;
477 double average, tmp;
478 ctable_HashEntry *hPtr;
479 char *result, *p;
480
481 /*
482 * Compute a histogram of bucket usage.
483 */
484
485 for (i = 0; i < NUM_COUNTERS; i++) {
486 count[i] = 0;
487 }
488 overflow = 0;
489 average = 0.0;
490 for (i = 0; i < tablePtr->numBuckets; i++) {
491 j = 0;
492 for (hPtr = tablePtr->buckets[i]; hPtr != NULL; hPtr = hPtr->nextPtr) {
493 j++;
494 }
495 if (j < NUM_COUNTERS) {
496 count[j]++;
497 } else {
498 overflow++;
499 }
500 tmp = j;
501 if (tablePtr->numEntries != 0) {
502 average += (tmp+1.0)*(tmp/tablePtr->numEntries)/2.0;
503 }
504 }
505
506 /*
507 * Print out the histogram and a few other pieces of information.
508 */
509
510 result = (char *) ckalloc((unsigned) (NUM_COUNTERS*60) + 300);
511 sprintf(result, "%d entries in table, %d buckets\n",
512 tablePtr->numEntries, tablePtr->numBuckets);
513 p = result + strlen(result);
514 for (i = 0; i < NUM_COUNTERS; i++) {
515 sprintf(p, "number of buckets with %d entries: %d\n",
516 i, count[i]);
517 p += strlen(p);
518 }
519 sprintf(p, "number of buckets with %d or more entries: %d\n",
520 NUM_COUNTERS, overflow);
521 p += strlen(p);
522 sprintf(p, "average search distance for entry: %.1f", average);
523 return result;
524 }
525
526 /*
527 *----------------------------------------------------------------------
528 *
529 * CompareStringKeys --
530 *
531 * Compares two string keys.
532 *
533 * Results:
534 * The return value is 0 if they are the same, and -1 if the new key
535 * is less than the existing key and 1 if the new key is greater
536 * than the existing key.
537 *
538 * Side effects:
539 * None.
540 *
541 *----------------------------------------------------------------------
542 */
543
544 static int
CompareStringKeys(ctable_HashTable * tablePtr,VOID * keyPtr,ctable_HashEntry * hPtr)545 CompareStringKeys(
546 ctable_HashTable *tablePtr,
547 VOID *keyPtr, /* New key to compare. */
548 ctable_HashEntry *hPtr) /* Existing key to compare. */
549 {
550 CONST char *p1 = (CONST char *) keyPtr;
551 CONST char *p2 = (CONST char *) hPtr->key;
552
553 #ifdef CTABLE_COMPARE_HASHES_WITH_STRCMP
554 return strcmp(p1, p2);
555 #else
556 for (;; p1++, p2++) {
557 if (*p1 != *p2) {
558 if (*p1 < *p2) {
559 return -1;
560 } else {
561 return 1;
562 }
563 break;
564 }
565 if (*p1 == '\0') {
566 return 0;
567 }
568 }
569 Tcl_Panic ("code failure in CompareStringKeys - should not be here");
570 return 0;
571 #endif /* CTABLE_COMPARE_HASHES_WITH_STRCMP */
572 }
573
574 /*
575 *----------------------------------------------------------------------
576 *
577 * HashStringKey --
578 *
579 * Compute a one-word summary of a text string, which can be used to
580 * generate a hash index.
581 *
582 * Results:
583 * The return value is a one-word summary of the information in string.
584 *
585 * Side effects:
586 * None.
587 *
588 *----------------------------------------------------------------------
589 */
590
591 static unsigned int
HashStringKey(ctable_HashTable * tablePtr,VOID * keyPtr)592 HashStringKey(
593 ctable_HashTable *tablePtr, /* Hash table. */
594 VOID *keyPtr) /* Key from which to compute hash value. */
595 {
596 CONST char *string = (CONST char *) keyPtr;
597 unsigned int result;
598 int c;
599
600 /*
601 * I tried a zillion different hash functions and asked many other people
602 * for advice. Many people had their own favorite functions, all
603 * different, but no-one had much idea why they were good ones. I chose
604 * the one below (multiply by 9 and add new character) because of the
605 * following reasons:
606 *
607 * 1. Multiplying by 10 is perfect for keys that are decimal strings, and
608 * multiplying by 9 is just about as good.
609 * 2. Times-9 is (shift-left-3) plus (old). This means that each
610 * character's bits hang around in the low-order bits of the hash value
611 * for ever, plus they spread fairly rapidly up to the high-order bits
612 * to fill out the hash value. This seems works well both for decimal
613 * and non-decimal strings, but isn't strong against maliciously-chosen
614 * keys.
615 */
616
617 result = 0;
618
619 for (c=*string++ ; c ; c=*string++) {
620 result += (result<<3) + c;
621 }
622 return result;
623 }
624
625 /*
626 *----------------------------------------------------------------------
627 *
628 * RebuildTable --
629 *
630 * This function is invoked when the ratio of entries to hash buckets
631 * becomes too large. It creates a new table with a larger bucket array
632 * and moves all of the entries into the new table.
633 *
634 * Results:
635 * None.
636 *
637 * Side effects:
638 * Memory gets reallocated and entries get re-hashed to new buckets.
639 *
640 *----------------------------------------------------------------------
641 */
642
643 static void
RebuildTable(ctable_HashTable * tablePtr)644 RebuildTable(ctable_HashTable *tablePtr) /* Table to enlarge. */
645 {
646 int oldSize, count, index;
647 ctable_HashEntry **oldBuckets;
648 ctable_HashEntry **oldChainPtr, **newChainPtr;
649 ctable_HashEntry *hPtr;
650
651 oldSize = tablePtr->numBuckets;
652 oldBuckets = tablePtr->buckets;
653
654 /*
655 * Allocate and initialize the new bucket array, and set up hashing
656 * constants for new array size.
657 */
658
659 tablePtr->numBuckets *= 16;
660 tablePtr->buckets = (ctable_HashEntry **) ckalloc((unsigned)
661 (tablePtr->numBuckets * sizeof(ctable_HashEntry *)));
662 for (count = tablePtr->numBuckets, newChainPtr = tablePtr->buckets;
663 count > 0; count--, newChainPtr++) {
664 *newChainPtr = NULL;
665 }
666 tablePtr->rebuildSize *= 16;
667 tablePtr->downShift -= 4;
668 tablePtr->mask = (tablePtr->mask << 4) + 15;
669
670 // printf("rebuilding table from %d buckets to %d buckets\n", oldSize, tablePtr->numBuckets);
671
672 /*
673 * Rehash all of the existing entries into the new bucket array.
674 */
675
676 for (oldChainPtr = oldBuckets; oldSize > 0; oldSize--, oldChainPtr++) {
677 for (hPtr = *oldChainPtr; hPtr != NULL; hPtr = *oldChainPtr) {
678 *oldChainPtr = hPtr->nextPtr;
679 index = RANDOM_INDEX (tablePtr, hPtr->hash);
680 hPtr->nextPtr = tablePtr->buckets[index];
681 tablePtr->buckets[index] = hPtr;
682 }
683 }
684
685 /*
686 * Free up the old bucket array, if it was dynamically allocated.
687 */
688
689 if (oldBuckets != tablePtr->staticBuckets) {
690 ckfree((char*)oldBuckets);
691 }
692
693 // printf("done\n");
694 }
695
696
697 /*
698 * Local Variables:
699 * mode: c
700 * c-basic-offset: 4
701 * fill-column: 78
702 * End:
703 */
704