1 /*
2  * speedtableHash.c --
3  *
4  *	Ctables implementation of in-memory hash tables, adapted from Tcl
5  *
6  * Copyright (c) 1991-1993 The Regents of the University of California.
7  * Copyright (c) 1994 Sun Microsystems, Inc.
8  *
9  * See the file "license.terms" for information on usage and redistribution of
10  * this file, and for a DISCLAIMER OF ALL WARRANTIES.
11  *
12  * RCS: @(#) $Id$
13  */
14 
15 //#include "tclInt.h"
16 #include <tcl.h>
17 
18 /*
19  * When there are this many entries per bucket, on average, rebuild the hash
20  * table to make it larger.
21  */
22 
23 #define REBUILD_MULTIPLIER	3
24 
25 /*
26  * The following macro takes a preliminary integer hash value and produces an
27  * index into a hash tables bucket list. The idea is to make it so that
28  * preliminary values that are arbitrarily similar will end up in different
29  * buckets. The hash function was taken from a random-number generator.
30  */
31 
32 #define RANDOM_INDEX(tablePtr, i) \
33     (((((long) (i))*1103515245) >> (tablePtr)->downShift) & (tablePtr)->mask)
34 
35 /*
36  * Prototypes for the string hash key methods.
37  */
38 
39 static int		CompareStringKeys(ctable_HashTable *tablePtr, VOID *keyPtr, ctable_HashEntry *hPtr);
40 static unsigned int	HashStringKey(ctable_HashTable *tablePtr, VOID *keyPtr);
41 
42 /*
43  * Function prototypes for static functions in this file:
44  */
45 
46 static void		RebuildTable(ctable_HashTable *tablePtr);
47 
48 
49 /*
50  *----------------------------------------------------------------------
51  *
52  * ctable_InitHashTable --
53  *
54  *	Given storage for a hash table, set up the fields to prepare the hash
55  *	table for use.
56  *
57  * Results:
58  *	None.
59  *
60  * Side effects:
61  *	TablePtr is now ready to be passed to ctable_FindHashEntry and
62  *	ctable_CreateHashEntry.
63  *
64  *----------------------------------------------------------------------
65  */
66 
67 void
ctable_InitHashTable(ctable_HashTable * tablePtr)68 ctable_InitHashTable(ctable_HashTable *tablePtr)
69 				/* Pointer to table record, which is supplied
70 				 * by the caller. */
71 {
72     int i;
73 
74 #if (CTABLE_SMALL_HASH_TABLE != 16)
75     Tcl_Panic("ctable_InitCustomHashTable: CTABLE_SMALL_HASH_TABLE is %d, not 16",
76 	    CTABLE_SMALL_HASH_TABLE);
77 #endif
78 
79     tablePtr->buckets = tablePtr->staticBuckets;
80 
81     for (i = 0; i < CTABLE_SMALL_HASH_TABLE; i++) {
82         tablePtr->staticBuckets[i] = 0;
83     }
84 
85     tablePtr->numBuckets = CTABLE_SMALL_HASH_TABLE;
86     tablePtr->numEntries = 0;
87     tablePtr->rebuildSize = CTABLE_SMALL_HASH_TABLE*REBUILD_MULTIPLIER;
88     tablePtr->downShift = 26;
89     tablePtr->mask = 15;
90 }
91 
92 /*
93  * defaultHashKeyProc --
94  *
95  *      Given a pointer of a type with an unknown hash key proc, generate
96  *      a hash for it.
97  */
98 #if defined(__amd64__) || defined(__ia64__)
99 # define defaultHashKeyProc(p) \
100 	((((quad_t)(p)) & 0xFFFFFFFF) ^ (((quad_t)(p)) >> 32))
101 #else
102 # define defaultHashKeyProc(p) ((unsigned int)(p))
103 #endif
104 
105 
106 /*
107  *----------------------------------------------------------------------
108  *
109  * ctable_InitOrStoreHashEntry --
110  *
111  *	Given a hash table with string keys, and a string key, find the entry
112  *	with a matching key. If there is no matching entry, then link up
113  *      the hashEntry provided, or a new one, into the table.
114  *
115  * Results:
116  *	The return value is a pointer to the matching entry. If the entry
117  *	is getting inserted, then *newPtr will be set to a non-zero value;
118  *	otherwise *newPtr will be set to 0. If this is a new entry the value
119  *	stored in the entry will initially be 0.
120  *      if NewPtr is NULL, then
121  *
122  * Side effects:
123  *	A new entry may be added to the hash table.
124  *
125  *----------------------------------------------------------------------
126  */
127 
128 static
129 inline
130 ctable_HashEntry *
ctable_InitOrStoreHashEntry(ctable_HashTable * tablePtr,CONST char * key,ctable_HashEntry * newEntry,int flags,int * newPtr)131 ctable_InitOrStoreHashEntry(
132     ctable_HashTable *tablePtr,	/* Table in which to lookup entry. */
133     CONST char *key,		/* Key to use to find or create matching
134 				 * entry. */
135     ctable_HashEntry *newEntry,	/* if not null, use this entry */
136     int flags,			/* options */
137     int *newPtr)		/* Store info here telling whether a new entry
138 				 * was created. */
139 {
140     ctable_HashEntry *hPtr;
141     unsigned int hash;
142     int index;
143 
144     hash = HashStringKey (tablePtr, (VOID *) key);
145     index = RANDOM_INDEX (tablePtr, hash);
146 
147     /*
148      * Search all of the entries in the appropriate bucket.
149      */
150 
151     for (hPtr = tablePtr->buckets[index]; hPtr != NULL;
152 	    hPtr = hPtr->nextPtr) {
153 
154 	if (hash != (unsigned int)(hPtr->hash)) {
155 	    continue;
156 	}
157 
158 	if (!CompareStringKeys(tablePtr, (VOID *) key, hPtr)) {
159 	    if (newPtr)
160 		*newPtr = 0;
161 	    return hPtr;
162 	}
163     }
164 
165     if (!newPtr)
166 	return NULL;
167 
168     if (newEntry)
169 	hPtr = newEntry;
170     else
171 	return NULL; /* Can't allocate */
172 
173     /*
174      * Entry not found. Add a new one to the bucket.
175      */
176 
177     *newPtr = 1;
178 
179     /* Allocate a new key if needed */
180     if(newEntry && (flags & KEY_MASK) == KEY_VOLATILE) {
181         hPtr->key = (char *) ckalloc (strlen (key) + 1);
182         strcpy (hPtr->key, key);
183     } else {
184 	hPtr->key = (char *)key;
185     }
186 
187     hPtr->hash = hash;
188     hPtr->nextPtr = tablePtr->buckets[index];
189     tablePtr->buckets[index] = hPtr;
190     tablePtr->numEntries++;
191 
192     /*
193      * If the table has exceeded a decent size, rebuild it with many more
194      * buckets.
195      */
196 
197     if (tablePtr->numEntries >= tablePtr->rebuildSize) {
198 	RebuildTable(tablePtr);
199     }
200     return hPtr;
201 }
202 
203 /*
204  *----------------------------------------------------------------------
205  *
206  * ctable_InitHashEntry --
207  *
208  *	Given a hash table with string keys, and a string key, find the entry
209  *	with a matching key. If there is no matching entry, then link up
210  *      a new hashEntry into the table.
211  *
212  * Results:
213  *	The return value is a pointer to the matching entry. If the entry
214  *	is getting inserted, then *newPtr will be set to a non-zero value;
215  *	otherwise *newPtr will be set to 0. If this is a new entry the value
216  *	stored in the entry will initially be 0.
217  *
218  * Side effects:
219  *	A new entry may be added to the hash table.
220  *
221  *----------------------------------------------------------------------
222  */
223 
224 static
225 inline
226 ctable_HashEntry *
ctable_InitHashEntry(ctable_HashTable * tablePtr,CONST char * key,int * newPtr)227 ctable_InitHashEntry(
228     ctable_HashTable *tablePtr,	/* Table in which to lookup entry. */
229     CONST char *key,		/* Key to use to find or create matching
230 				 * entry. */
231     int *newPtr)		/* Store info here telling whether a new entry
232 				 * was created. */
233 {
234     return
235 	ctable_InitOrStoreHashEntry(tablePtr, key, NULL, 0, newPtr);
236 }
237 
238 /*
239  *----------------------------------------------------------------------
240  *
241  * ctable_StoreHashEntry --
242  *
243  *	Given a hash table with string keys, and a string key, find the entry
244  *	with a matching key. If there is no matching entry, then store the
245  *      provided entry into the table.
246  *
247  * Results:
248  *	The return value is a pointer to the matching entry. If the entry
249  *	is getting inserted, then *newPtr will be set to a non-zero value;
250  *	otherwise *newPtr will be set to 0. If this is a new entry the value
251  *	stored in the entry will initially be 0.
252  *
253  * Side effects:
254  *	A new entry may be added to the hash table.
255  *
256  *----------------------------------------------------------------------
257  */
258 
259 static
260 inline
261 ctable_HashEntry *
ctable_StoreHashEntry(ctable_HashTable * tablePtr,CONST char * key,ctable_HashEntry * newEntry,int flags,int * newPtr)262 ctable_StoreHashEntry(
263     ctable_HashTable *tablePtr,	/* Table in which to lookup entry. */
264     CONST char *key,		/* Key to use to find or create matching
265 				 * entry. */
266     ctable_HashEntry *newEntry,	/* if not null, use this entry */
267     int flags,			/* options */
268     int *newPtr)		/* Store info here telling whether a new entry
269 				 * was created. */
270 {
271     return
272 	ctable_InitOrStoreHashEntry(tablePtr, key, newEntry, flags, newPtr);
273 }
274 
275 
276 /*
277  *----------------------------------------------------------------------
278  *
279  * ctable_FindHashEntry --
280  *
281  *	Given a hash table find the entry with a matching key.
282  *
283  * Results:
284  *	The return value is a token for the matching entry in the hash table,
285  *	or NULL if there was no matching entry.
286  *
287  * Side effects:
288  *	None.
289  *
290  *----------------------------------------------------------------------
291  */
292 inline
293 ctable_HashEntry *
ctable_FindHashEntry(ctable_HashTable * tablePtr,CONST char * key)294 ctable_FindHashEntry(
295     ctable_HashTable *tablePtr,	/* Table in which to lookup entry. */
296     CONST char *key)		/* Key to use to find matching entry. */
297 {
298     return ctable_InitHashEntry (tablePtr, key, NULL);
299 }
300 
301 /*
302  *----------------------------------------------------------------------
303  *
304  * ctable_DeleteHashEntry --
305  *
306  *	Remove a single entry from a hash table.
307  *
308  * Results:
309  *	None.
310  *
311  * Side effects:
312  *	The entry given by entryPtr is deleted from its table and should never
313  *	again be used by the caller. It is up to the caller to free the
314  *	clientData field of the entry, if that is relevant.
315  *
316  *----------------------------------------------------------------------
317  */
318 void
ctable_DeleteHashEntry(ctable_HashTable * tablePtr,ctable_HashEntry * entryPtr,char * nullKeyValue)319 ctable_DeleteHashEntry(ctable_HashTable *tablePtr, ctable_HashEntry *entryPtr, char *nullKeyValue)
320 {
321     ctable_HashEntry *prevPtr;
322     ctable_HashEntry **bucketPtr;
323     int index;
324 
325     index = RANDOM_INDEX (tablePtr, entryPtr->hash);
326 
327     bucketPtr = &(tablePtr->buckets[index]);
328 
329     if (*bucketPtr == entryPtr) {
330 	*bucketPtr = entryPtr->nextPtr;
331     } else {
332 	for (prevPtr = *bucketPtr; ; prevPtr = prevPtr->nextPtr) {
333 	    if (prevPtr == NULL) {
334 		Tcl_Panic("malformed bucket chain in ctable_DeleteHashEntry");
335 	    }
336 	    if (prevPtr->nextPtr == entryPtr) {
337 		prevPtr->nextPtr = entryPtr->nextPtr;
338 		break;
339 	    }
340 	}
341     }
342 
343     tablePtr->numEntries--;
344 
345     if(entryPtr->key != nullKeyValue)
346         ckfree (entryPtr->key);
347 }
348 
349 
350 /*
351  *----------------------------------------------------------------------
352  *
353  * ctable_DeleteHashTable --
354  *
355  *	Free up everything associated with a hash table except for the record
356  *	for the table itself.
357  *
358  * Results:
359  *	None.
360  *
361  * Side effects:
362  *	The hash table is no longer useable.
363  *
364  *----------------------------------------------------------------------
365  */
366 
367 void
ctable_DeleteHashTable(ctable_HashTable * tablePtr)368 ctable_DeleteHashTable(ctable_HashTable *tablePtr)	/* Table to delete. */
369 {
370     /*
371      * Free up the bucket array, if it was dynamically allocated.
372      */
373 
374     if (tablePtr->buckets != tablePtr->staticBuckets) {
375 	ckfree((char*)tablePtr->buckets);
376     }
377 }
378 
379 /*
380  *----------------------------------------------------------------------
381  *
382  * ctable_FirstHashEntry --
383  *
384  *	Locate the first entry in a hash table and set up a record that can be
385  *	used to step through all the remaining entries of the table.
386  *
387  * Results:
388  *	The return value is a pointer to the first entry in tablePtr, or NULL
389  *	if tablePtr has no entries in it. The memory at *searchPtr is
390  *	initialized so that subsequent calls to ctable_NextHashEntry will return
391  *	all of the entries in the table, one at a time.
392  *
393  * Side effects:
394  *	None.
395  *
396  *----------------------------------------------------------------------
397  */
398 
399 ctable_HashEntry *
ctable_FirstHashEntry(ctable_HashTable * tablePtr,ctable_HashSearch * searchPtr)400 ctable_FirstHashEntry(
401     ctable_HashTable *tablePtr,	/* Table to search. */
402     ctable_HashSearch *searchPtr)	/* Place to store information about progress
403 				 * through the table. */
404 {
405     searchPtr->tablePtr = tablePtr;
406     searchPtr->nextIndex = 0;
407     searchPtr->nextEntryPtr = NULL;
408     return ctable_NextHashEntry(searchPtr);
409 }
410 
411 /*
412  *----------------------------------------------------------------------
413  *
414  * ctable_NextHashEntry --
415  *
416  *	Once a hash table enumeration has been initiated by calling
417  *	ctable_FirstHashEntry, this function may be called to return successive
418  *	elements of the table.
419  *
420  * Results:
421  *	The return value is the next entry in the hash table being enumerated,
422  *	or NULL if the end of the table is reached.
423  *
424  * Side effects:
425  *	None.
426  *
427  *----------------------------------------------------------------------
428  */
429 
430 ctable_HashEntry *
ctable_NextHashEntry(ctable_HashSearch * searchPtr)431 ctable_NextHashEntry(ctable_HashSearch *searchPtr)
432 				/* Place to store information about progress
433 				 * through the table. Must have been
434 				 * initialized by calling
435 				 * ctable_FirstHashEntry. */
436 {
437     ctable_HashEntry *hPtr;
438     ctable_HashTable *tablePtr = searchPtr->tablePtr;
439 
440     while (searchPtr->nextEntryPtr == NULL) {
441 	if (searchPtr->nextIndex >= tablePtr->numBuckets) {
442 	    return NULL;
443 	}
444 	searchPtr->nextEntryPtr =
445 		tablePtr->buckets[searchPtr->nextIndex];
446 	searchPtr->nextIndex++;
447     }
448     hPtr = searchPtr->nextEntryPtr;
449     searchPtr->nextEntryPtr = hPtr->nextPtr;
450     return hPtr;
451 }
452 
453 /*
454  *----------------------------------------------------------------------
455  *
456  * ctable_HashStats --
457  *
458  *	Return statistics describing the layout of the hash table in its hash
459  *	buckets.
460  *
461  * Results:
462  *	The return value is a malloc-ed string containing information about
463  *	tablePtr. It is the caller's responsibility to free this string.
464  *
465  * Side effects:
466  *	None.
467  *
468  *----------------------------------------------------------------------
469  */
470 
471 CONST char *
ctable_HashStats(ctable_HashTable * tablePtr)472 ctable_HashStats(
473     ctable_HashTable *tablePtr)	/* Table for which to produce stats. */
474 {
475 #define NUM_COUNTERS 10
476     int count[NUM_COUNTERS], overflow, i, j;
477     double average, tmp;
478     ctable_HashEntry *hPtr;
479     char *result, *p;
480 
481     /*
482      * Compute a histogram of bucket usage.
483      */
484 
485     for (i = 0; i < NUM_COUNTERS; i++) {
486 	count[i] = 0;
487     }
488     overflow = 0;
489     average = 0.0;
490     for (i = 0; i < tablePtr->numBuckets; i++) {
491 	j = 0;
492 	for (hPtr = tablePtr->buckets[i]; hPtr != NULL; hPtr = hPtr->nextPtr) {
493 	    j++;
494 	}
495 	if (j < NUM_COUNTERS) {
496 	    count[j]++;
497 	} else {
498 	    overflow++;
499 	}
500 	tmp = j;
501 	if (tablePtr->numEntries != 0) {
502 	    average += (tmp+1.0)*(tmp/tablePtr->numEntries)/2.0;
503 	}
504     }
505 
506     /*
507      * Print out the histogram and a few other pieces of information.
508      */
509 
510     result = (char *) ckalloc((unsigned) (NUM_COUNTERS*60) + 300);
511     sprintf(result, "%d entries in table, %d buckets\n",
512 	    tablePtr->numEntries, tablePtr->numBuckets);
513     p = result + strlen(result);
514     for (i = 0; i < NUM_COUNTERS; i++) {
515 	sprintf(p, "number of buckets with %d entries: %d\n",
516 		i, count[i]);
517 	p += strlen(p);
518     }
519     sprintf(p, "number of buckets with %d or more entries: %d\n",
520 	    NUM_COUNTERS, overflow);
521     p += strlen(p);
522     sprintf(p, "average search distance for entry: %.1f", average);
523     return result;
524 }
525 
526 /*
527  *----------------------------------------------------------------------
528  *
529  * CompareStringKeys --
530  *
531  *	Compares two string keys.
532  *
533  * Results:
534  *	The return value is 0 if they are the same, and -1 if the new key
535  *      is less than the existing key and 1 if the new key is greater
536  *      than the existing key.
537  *
538  * Side effects:
539  *	None.
540  *
541  *----------------------------------------------------------------------
542  */
543 
544 static int
CompareStringKeys(ctable_HashTable * tablePtr,VOID * keyPtr,ctable_HashEntry * hPtr)545 CompareStringKeys(
546     ctable_HashTable *tablePtr,
547     VOID *keyPtr,		/* New key to compare. */
548     ctable_HashEntry *hPtr)	/* Existing key to compare. */
549 {
550     CONST char *p1 = (CONST char *) keyPtr;
551     CONST char *p2 = (CONST char *) hPtr->key;
552 
553 #ifdef CTABLE_COMPARE_HASHES_WITH_STRCMP
554     return strcmp(p1, p2);
555 #else
556     for (;; p1++, p2++) {
557 	if (*p1 != *p2) {
558 	    if (*p1 < *p2) {
559 	        return -1;
560 	    } else {
561 	        return 1;
562 	    }
563 	    break;
564 	}
565 	if (*p1 == '\0') {
566 	    return 0;
567 	}
568     }
569     Tcl_Panic ("code failure in CompareStringKeys - should not be here");
570     return 0;
571 #endif /* CTABLE_COMPARE_HASHES_WITH_STRCMP */
572 }
573 
574 /*
575  *----------------------------------------------------------------------
576  *
577  * HashStringKey --
578  *
579  *	Compute a one-word summary of a text string, which can be used to
580  *	generate a hash index.
581  *
582  * Results:
583  *	The return value is a one-word summary of the information in string.
584  *
585  * Side effects:
586  *	None.
587  *
588  *----------------------------------------------------------------------
589  */
590 
591 static unsigned int
HashStringKey(ctable_HashTable * tablePtr,VOID * keyPtr)592 HashStringKey(
593     ctable_HashTable *tablePtr,	/* Hash table. */
594     VOID *keyPtr)		/* Key from which to compute hash value. */
595 {
596     CONST char *string = (CONST char *) keyPtr;
597     unsigned int result;
598     int c;
599 
600     /*
601      * I tried a zillion different hash functions and asked many other people
602      * for advice. Many people had their own favorite functions, all
603      * different, but no-one had much idea why they were good ones. I chose
604      * the one below (multiply by 9 and add new character) because of the
605      * following reasons:
606      *
607      * 1. Multiplying by 10 is perfect for keys that are decimal strings, and
608      *	  multiplying by 9 is just about as good.
609      * 2. Times-9 is (shift-left-3) plus (old). This means that each
610      *	  character's bits hang around in the low-order bits of the hash value
611      *	  for ever, plus they spread fairly rapidly up to the high-order bits
612      *	  to fill out the hash value. This seems works well both for decimal
613      *	  and non-decimal strings, but isn't strong against maliciously-chosen
614      *	  keys.
615      */
616 
617     result = 0;
618 
619     for (c=*string++ ; c ; c=*string++) {
620 	result += (result<<3) + c;
621     }
622     return result;
623 }
624 
625 /*
626  *----------------------------------------------------------------------
627  *
628  * RebuildTable --
629  *
630  *	This function is invoked when the ratio of entries to hash buckets
631  *	becomes too large. It creates a new table with a larger bucket array
632  *	and moves all of the entries into the new table.
633  *
634  * Results:
635  *	None.
636  *
637  * Side effects:
638  *	Memory gets reallocated and entries get re-hashed to new buckets.
639  *
640  *----------------------------------------------------------------------
641  */
642 
643 static void
RebuildTable(ctable_HashTable * tablePtr)644 RebuildTable(ctable_HashTable *tablePtr)	/* Table to enlarge. */
645 {
646     int oldSize, count, index;
647     ctable_HashEntry **oldBuckets;
648     ctable_HashEntry **oldChainPtr, **newChainPtr;
649     ctable_HashEntry *hPtr;
650 
651     oldSize = tablePtr->numBuckets;
652     oldBuckets = tablePtr->buckets;
653 
654     /*
655      * Allocate and initialize the new bucket array, and set up hashing
656      * constants for new array size.
657      */
658 
659     tablePtr->numBuckets *= 16;
660     tablePtr->buckets = (ctable_HashEntry **) ckalloc((unsigned)
661 	    (tablePtr->numBuckets * sizeof(ctable_HashEntry *)));
662     for (count = tablePtr->numBuckets, newChainPtr = tablePtr->buckets;
663 	    count > 0; count--, newChainPtr++) {
664 	*newChainPtr = NULL;
665     }
666     tablePtr->rebuildSize *= 16;
667     tablePtr->downShift -= 4;
668     tablePtr->mask = (tablePtr->mask << 4) + 15;
669 
670     // printf("rebuilding table from %d buckets to %d buckets\n", oldSize, tablePtr->numBuckets);
671 
672     /*
673      * Rehash all of the existing entries into the new bucket array.
674      */
675 
676     for (oldChainPtr = oldBuckets; oldSize > 0; oldSize--, oldChainPtr++) {
677 	for (hPtr = *oldChainPtr; hPtr != NULL; hPtr = *oldChainPtr) {
678 	    *oldChainPtr = hPtr->nextPtr;
679 	    index = RANDOM_INDEX (tablePtr, hPtr->hash);
680 	    hPtr->nextPtr = tablePtr->buckets[index];
681 	    tablePtr->buckets[index] = hPtr;
682 	}
683     }
684 
685     /*
686      * Free up the old bucket array, if it was dynamically allocated.
687      */
688 
689     if (oldBuckets != tablePtr->staticBuckets) {
690 	ckfree((char*)oldBuckets);
691     }
692 
693     // printf("done\n");
694 }
695 
696 
697 /*
698  * Local Variables:
699  * mode: c
700  * c-basic-offset: 4
701  * fill-column: 78
702  * End:
703  */
704