1 
2 
3 /*
4  * Dynamic hashing, after CACM April 1988 pp 446-457, by Per-Ake Larson.
5  * Coded into C, with minor code improvements, and with hsearch(3) interface,
6  * by ejp@ausmelb.oz, Jul 26, 1988: 13:16;
7  * also, hcreate/hdestroy routines added to simulate hsearch(3).
8  *
9  * move to local hash table, cleanup abstraction and storage allocation;
10  * convert to ANSII C.
11  * by snc (clark@cme.nbs.gov), 10-Mar-1989
12  *
13  * these routines simulate hsearch(3) and family, with the important
14  * difference that the hash table is dynamic - can grow indefinitely
15  * beyond its original size (as supplied to hcreate()).
16  *
17  * Performance appears to be comparable to that of hsearch(3).
18  * The 'source-code' options referred to in hsearch(3)'s 'man' page
19  * are not implemented; otherwise functionality is identical.
20  *
21  * Compilation controls:
22  * HASH_DEBUG controls some informative traces, mainly for debugging.
23  * HASH_STATISTICS causes HashAccesses and HashCollisions to be maintained;
24  * when combined with HASH_DEBUG, these are displayed by hdestroy().
25  *
26  * Problems & fixes to ejp@ausmelb.oz. WARNING: relies on pre-processor
27  * concatenation property, in probably unnecessary code 'optimisation'.
28  *
29  * snc: fixed concatenation for ANSII C, 10-Mar-1989
30  *
31  * $Log: hash.c,v $
32  * Revision 1.8  1997/10/22 16:36:49  sauderd
33  * Changed the use and definitions of the compiler macros MUL and DIV. They
34  * seem to be defined and used only within hash.h and hash.c
35  *
36  * Revision 1.7  1997/01/21 19:19:51  dar
37  * made C++ compatible
38  *
39  * Revision 1.6  1994/03/23  20:06:51  libes
40  * botched free in hash destroy
41  *
42  * Revision 1.5  1993/10/15  18:49:55  libes
43  * CADDETC certified
44  *
45  * Revision 1.4  1992/08/18  18:27:10  libes
46  * changed DEBUG to HASH_DEBUG
47  *
48  * Revision 1.3  1992/08/18  17:16:22  libes
49  * rm'd extraneous error messages
50  *
51  * Revision 1.2  1992/05/31  08:37:18  libes
52  * multiple files
53  *
54  * Revision 1.1  1992/05/28  03:56:55  libes
55  * Initial revision
56  *
57  * Revision 1.4  1992/05/14  10:15:04  libes
58  * don't remember
59  *
60  * Revision 1.3  1992/02/12  07:06:49  libes
61  * y
62  * y
63  * do sub/supertype
64  *
65  * Revision 1.2  1992/02/09  00:51:02  libes
66  * does ref/use correctly
67  *
68  * Revision 1.1  1992/02/05  08:34:24  libes
69  * Initial revision
70  *
71  * Revision 1.0.1.1  1992/01/22  02:40:04  libes
72  * copied from ~pdes
73  *
74  * Revision 1.9  1992/01/22  02:26:58  libes
75  * added code to test hash package
76  *
77  * Revision 1.8  1991/09/17  02:53:23  libes
78  * fixed bug in HASHcopy (new hash tables were coming out empty)
79  *
80  * Revision 1.7  1991/08/30  16:34:36  libes
81  * fixed HASHlist to use state from parameter instead of static
82  *
83  * Revision 1.6  1991/08/13  22:19:39  libes
84  * forgot to declare s, s2, pp, and q.  Oops.
85  *
86  * Revision 1.5  1991/08/06  19:03:07  libes
87  * fixed bugs relating to my previous addition (delete function)
88  * added HASHcopy to support DICT_copy via OBJcopy
89  *
90  * Revision 1.4  1991/07/19  03:57:46  libes
91  * added action HASH_DELETE
92  * made action HASH_INSERT return failure upon duplicate entry
93  *
94  * Revision 1.3  1991/02/26  19:40:45  libes
95  * Added functions for visiting every member of the hash table, to duplicate
96  * similar functionality of linked lists.  Note that current implementation
97  * can only walk through one hash table at a time.  This can be improved, but
98  * the application code doesn't currently need such a feature.
99  *
100  * Revision 1.2  1990/09/06  11:06:56  clark
101  * BPR 2.1 alpha
102  *
103  * Revision 1.1  90/06/11  16:44:43  clark
104  * Initial revision
105  *
106  */
107 
108 #include <sc_memmgr.h>
109 #include <assert.h>
110 #include <string.h>
111 #include <stdlib.h>
112 #include "express/hash.h"
113 
114 struct freelist_head HASH_Table_fl;
115 struct freelist_head HASH_Element_fl;
116 
117 /*
118 ** Internal routines
119 */
120 
121 static_inline Address   HASHhash( char *, Hash_Table );
122 static void     HASHexpand_table( Hash_Table );
123 
124 /*
125 ** Local data
126 */
127 
128 # ifdef HASH_STATISTICS
129 static long     HashAccesses, HashCollisions;
130 # endif
131 
132 /*
133 ** Code
134 */
135 
136 void
HASHinitialize()137 HASHinitialize() {
138     if( HASH_Table_fl.size_elt == 0 ) {
139         MEMinitialize( &HASH_Table_fl, sizeof( struct Hash_Table_ ), 50, 50 );
140     }
141     if( HASH_Element_fl.size_elt == 0 ) {
142         MEMinitialize( &HASH_Element_fl, sizeof( struct Element_ ), 500, 100 );
143     }
144 }
145 
146 Hash_Table
HASHcreate(unsigned count)147 HASHcreate( unsigned count ) {
148     unsigned    i;
149     Hash_Table  table;
150 
151     /*
152     ** Adjust Count to be nearest higher power of 2,
153     ** minimum SEGMENT_SIZE, then convert into segments.
154     */
155     i = SEGMENT_SIZE;
156     while( i < count ) {
157         i <<= 1;
158     }
159     count = DIV( i, SEGMENT_SIZE_SHIFT );
160 
161     table = HASH_Table_new();
162 #if 0
163     table->in_use = 0;
164 #endif
165     table->SegmentCount = table->p = table->KeyCount = 0;
166     /*
167     ** Allocate initial 'i' segments of buckets
168     */
169     for( i = 0; i < count; i++ )
170         CALLOC( table->Directory[i], SEGMENT_SIZE, Element )
171         /*,   "segment in HASHcreate");*/
172 
173         table->SegmentCount = count;
174     table->maxp = MUL( count, SEGMENT_SIZE_SHIFT );
175     table->MinLoadFactor = 1;
176     table->MaxLoadFactor = MAX_LOAD_FACTOR;
177 # ifdef HASH_DEBUG
178     fprintf( stderr,
179              "[HASHcreate] table %x count %d maxp %d SegmentCount %d\n",
180              table,
181              count,
182              table->maxp,
183              table->SegmentCount );
184 # endif
185 # ifdef HASH_STATISTICS
186     HashAccesses = HashCollisions = 0;
187 # endif
188     return( table );
189 }
190 
191 /* initialize pointer to beginning of hash table so we can step through it */
192 /* on repeated calls to HASHlist - DEL */
193 void
HASHlistinit(Hash_Table table,HashEntry * he)194 HASHlistinit( Hash_Table table, HashEntry * he ) {
195     he->i = he->j = 0;
196     he->p = 0;
197     he->table = table;
198     he->type = '*';
199 #if 0
200     table->in_use = 1;
201 #endif
202 }
203 
204 void
HASHlistinit_by_type(Hash_Table table,HashEntry * he,char type)205 HASHlistinit_by_type( Hash_Table table, HashEntry * he, char type ) {
206     he->i = he->j = 0;
207     he->p = 0;
208     he->table = table;
209     he->type = type;
210 #if 0
211     table->in_use = 1;
212 #endif
213 }
214 
215 #if 0
216 /* if you don't step to the end, you can clear the flag this way */
217 void
218 HASHlistend( HashEntry * he ) {
219     he->table->in_use = 0;
220 }
221 #endif
222 
223 /* provide a way to step through the hash */
224 Element
HASHlist(HashEntry * he)225 HASHlist( HashEntry * he ) {
226     int i2 = he->i;
227     int j2 = he->j;
228     Segment s;
229 
230     he->e = 0;
231 
232     for( he->i = i2; he->i < he->table->SegmentCount; he->i++ ) {
233         /* test probably unnecessary    */
234         if( ( s = he->table->Directory[he->i] ) != NULL ) {
235             for( he->j = j2; he->j < SEGMENT_SIZE; he->j++ ) {
236                 if( !he->p ) {
237                     he->p = s[he->j];
238                 }
239 
240                 /* if he->p is defined, prepare to return it (by
241                    setting it to he->e) and begin looking for a new value
242                    for he->p
243                  */
244 retry:
245                 if( he->p ) {
246                     if( ( he->type != '*' ) &&
247                             ( he->type != he->p->type ) ) {
248                         he->p = he->p->next;
249                         goto retry;
250                     }
251                     if( he->e ) {
252                         return( he->e );
253                     }
254                     he->e = he->p;
255                     he->p = he->p->next;
256                 }
257 
258                 /* avoid incrementing he->j by returning here */
259                 if( he->p ) {
260                     return( he->e );
261                 }
262             }
263             j2 = 0;
264         }
265     }
266     /* if he->e was set then it is last one */
267 #if 0
268     he->table->in_use = 0;
269 #endif
270     return( he->e );
271 }
272 
273 #if 0
274 /* this verifies no one else is walking through the table that we might screw up */
275 /* it should be called before adding, deleting or destroying a table */
276 HASH_in_use( Hash_Table table, char * action ) {
277     fprintf( stderr, "HASH: attempted to %s but hash table in use\n", action );
278 }
279 #endif
280 
281 void
HASHdestroy(Hash_Table table)282 HASHdestroy( Hash_Table table ) {
283     Segment s;
284     Element p, q;
285 
286     if( table != HASH_NULL ) {
287         unsigned int i, j;
288 #if 0
289         if( table->in_use ) {
290             HASH_in_use( table, "destroy hash table" );
291         }
292 #endif
293         for( i = 0; i < table->SegmentCount; i++ ) {
294             /* test probably unnecessary    */
295             if( ( s = table->Directory[i] ) != NULL ) {
296                 for( j = 0; j < SEGMENT_SIZE; j++ ) {
297                     p = s[j];
298                     while( p != NULL ) {
299                         q = p->next;
300                         HASH_Element_destroy( p );
301                         p = q;
302                     }
303                 }
304                 sc_free( table->Directory[i] );
305             }
306         }
307         HASH_Table_destroy( table );
308 # if defined(HASH_STATISTICS) && defined(HASH_DEBUG)
309         fprintf( stderr,
310                  "[hdestroy] Accesses %ld Collisions %ld\n",
311                  HashAccesses,
312                  HashCollisions );
313 # endif
314     }
315 }
316 
317 Element
HASHsearch(Hash_Table table,Element item,Action action)318 HASHsearch( Hash_Table table, Element item, Action action ) {
319     Address h;
320     Segment CurrentSegment;
321     int     SegmentIndex;
322     int     SegmentDir;
323     Segment p;
324     Element q;
325     Element deleteme;
326 
327     assert( table != HASH_NULL ); /* Kinder really than return(NULL); */
328 # ifdef HASH_STATISTICS
329     HashAccesses++;
330 # endif
331     h = HASHhash( item->key, table );
332     SegmentDir = DIV( h, SEGMENT_SIZE_SHIFT );
333     SegmentIndex = MOD( h, SEGMENT_SIZE );
334     /*
335     ** valid segment ensured by HASHhash()
336     */
337     CurrentSegment = table->Directory[SegmentDir];
338     assert( CurrentSegment != NULL ); /* bad failure if tripped   */
339     p = CurrentSegment + SegmentIndex;
340     q = *p;
341     /*
342     ** Follow collision chain
343     ** Now and after we finish this loop
344     **  p = &element, and
345     **  q = element
346     */
347     while( q != NULL && strcmp( q->key, item->key ) ) {
348         p = &q->next;
349         q = *p;
350 # ifdef HASH_STATISTICS
351         HashCollisions++;
352 # endif
353     }
354     /* at this point, we have either found the element or it doesn't exist */
355     switch( action ) {
356         case HASH_FIND:
357             return( ( Element )q );
358         case HASH_DELETE:
359             if( !q ) {
360                 return( 0 );
361             }
362             /* at this point, element exists and action == DELETE */
363 #if 0
364             if( table->in_use ) {
365                 HASH_in_use( table, "insert element" );
366             }
367 #endif
368             deleteme = q;
369             *p = q->next;
370             /*STRINGfree(deleteme->key);*/
371             HASH_Element_destroy( deleteme );
372             --table->KeyCount;
373             return( deleteme ); /* of course, user shouldn't deref this! */
374         case HASH_INSERT:
375             /* if trying to insert it (twice), let them know */
376             if( q != NULL ) {
377                 return( q );    /* was return(0);!!!!!?!?! */
378             }
379 
380 #if 0
381             if( table->in_use ) {
382                 HASH_in_use( table, "delete element" );
383             }
384 #endif
385             /* at this point, element does not exist and action == INSERT */
386             q = HASH_Element_new();
387             *p = q;             /* link into chain  */
388             /*
389             ** Initialize new element
390             */
391             /* I don't see the point of copying the key!!!! */
392             /*    q->key = STRINGcopy(item->key);*/
393             q->key = item->key;
394             q->data = item->data;
395             q->symbol = item->symbol;
396             q->type = item->type;
397             q->next = NULL;
398             /*
399             ** table over-full?
400             */
401             if( ++table->KeyCount / MUL( table->SegmentCount, SEGMENT_SIZE_SHIFT ) > table->MaxLoadFactor ) {
402                 HASHexpand_table( table );    /* doesn't affect q   */
403             }
404     }
405     return( ( Element )0 ); /* was return (Element)q */
406 }
407 
408 /*
409 ** Internal routines
410 */
411 
HASHhash(char * Key,Hash_Table table)412 static_inline Address HASHhash( char * Key, Hash_Table table ) {
413     Address     h, address;
414     register unsigned char * k = ( unsigned char * )Key;
415 
416     h = 0;
417     /*
418     ** Convert string to integer
419     */
420     /*SUPPRESS 112*/
421     assert( Key );
422     while( *k )
423         /*SUPPRESS 8*/ { /*SUPPRESS 112*/
424         h = h * PRIME1 ^ ( *k++ - ' ' );
425     }
426     h %= PRIME2;
427     address = MOD( h, table->maxp );
428     if( address < table->p ) {
429         address = MOD( h, ( table->maxp << 1 ) );    /* h % (2*table->maxp) */
430     }
431     return( address );
432 }
433 
HASHexpand_table(Hash_Table table)434 static void HASHexpand_table( Hash_Table table ) {
435     Segment OldSegment, NewSegment;
436     Element Current, *Previous, *LastOfNew;
437 
438     if( table->maxp + table->p < MUL( DIRECTORY_SIZE, SEGMENT_SIZE_SHIFT ) ) {
439         Address NewAddress;
440         int OldSegmentIndex, NewSegmentIndex;
441         int OldSegmentDir, NewSegmentDir;
442         /*
443         ** Locate the bucket to be split
444         */
445         OldSegmentDir = DIV( table->p, SEGMENT_SIZE_SHIFT );
446         OldSegment = table->Directory[OldSegmentDir];
447         OldSegmentIndex = MOD( table->p, SEGMENT_SIZE );
448         /*
449         ** Expand address space; if necessary create a new segment
450         */
451         NewAddress = table->maxp + table->p;
452         NewSegmentDir = DIV( NewAddress, SEGMENT_SIZE_SHIFT );
453         NewSegmentIndex = MOD( NewAddress, SEGMENT_SIZE );
454         if( NewSegmentIndex == 0 ) {
455             CALLOC( table->Directory[NewSegmentDir], SEGMENT_SIZE, Element );
456         }
457         /*  "segment in HASHexpand_table");*/
458         NewSegment = table->Directory[NewSegmentDir];
459         /*
460         ** Adjust state variables
461         */
462         table->p++;
463         if( table->p == table->maxp ) {
464             table->maxp <<= 1;  /* table->maxp *= 2 */
465             table->p = 0;
466         }
467         table->SegmentCount++;
468         /*
469         ** Relocate records to the new bucket
470         */
471         Previous = &OldSegment[OldSegmentIndex];
472         Current = *Previous;
473         LastOfNew = &NewSegment[NewSegmentIndex];
474         *LastOfNew = NULL;
475         while( Current != NULL ) {
476             if( HASHhash( Current->key, table ) == NewAddress ) {
477                 /*
478                 ** Attach it to the end of the new chain
479                 */
480                 *LastOfNew = Current;
481                 /*
482                 ** Remove it from old chain
483                 */
484                 *Previous = Current->next;
485                 LastOfNew = &Current->next;
486                 Current = Current->next;
487                 *LastOfNew = NULL;
488             } else {
489                 /*
490                 ** leave it on the old chain
491                 */
492                 Previous = &Current->next;
493                 Current = Current->next;
494             }
495         }
496     }
497 }
498 
499 /* make a complete copy of a hash table */
500 /* Note that individual objects are shallow-copied.  OBJcopy is not called! */
501 /* But then, it isn't called when objects are inserted/deleted so this seems */
502 /* reasonable - DEL */
503 Hash_Table
HASHcopy(Hash_Table oldtable)504 HASHcopy( Hash_Table oldtable ) {
505     Hash_Table newtable;
506     Segment s, s2;
507     Element * pp;   /* old element */
508     Element * qq;   /* new element */
509     unsigned int i, j;
510 
511     newtable = HASH_Table_new();
512     for( i = 0; i < oldtable->SegmentCount; i++ ) {
513         CALLOC( newtable->Directory[i], SEGMENT_SIZE, Element );
514         /*    "segment in HASHcopy");*/
515     }
516 
517     newtable->p     = oldtable->p;
518     newtable->SegmentCount  = oldtable->SegmentCount;
519     newtable->maxp      = oldtable->maxp;
520     newtable->MinLoadFactor = oldtable->MinLoadFactor;
521     newtable->MaxLoadFactor = oldtable->MaxLoadFactor;
522     newtable->KeyCount  = oldtable->KeyCount;
523 
524     for( i = 0; i < oldtable->SegmentCount; i++ ) {
525         /* test probably unnecessary */
526         if( ( s = oldtable->Directory[i] ) != NULL ) {
527             s2 = newtable->Directory[i];
528             for( j = 0; j < SEGMENT_SIZE; j++ ) {
529                 qq = &s2[j];
530                 for( pp = &s[j]; *pp; pp = &( *pp )->next ) {
531                     *qq = HASH_Element_new();
532                     /*      (*qq)->key = STRINGcopy((*pp)->key);*/
533                     /* I really doubt it is necessary to copy the key!!! */
534                     ( *qq )->key = ( *pp )->key;
535                     ( *qq )->data = ( *pp )->data;
536                     ( *qq )->symbol = ( *pp )->symbol;
537                     ( *qq )->type = ( *pp )->type;
538                     ( *qq )->next = NULL;
539                     qq = &( ( *qq )->next );
540                 }
541             }
542         }
543     }
544     return( newtable );
545 }
546 
547 /* following code is for testing hash package */
548 #ifdef HASHTEST
549 struct Element e1, e2, e3, *e;
550 struct Hash_Table * t;
551 HashEntry he;
552 
main()553 main() {
554     e1.key = "foo";
555     e1.data = ( char * )1;
556     e2.key = "bar";
557     e2.data = ( char * )2;
558     e3.key = "herschel";
559     e3.data = ( char * )3;
560 
561     t = HASHcreate( 100 );
562     e = HASHsearch( t, &e1, HASH_INSERT );
563     e = HASHsearch( t, &e2, HASH_INSERT );
564     e = HASHsearch( t, &e3, HASH_INSERT );
565     HASHlistinit( t, &he );
566     for( ;; ) {
567         e = HASHlist( &he );
568         if( !e ) {
569             exit( 0 );
570         }
571         fprintf( stderr, "found key %s, data %d\n", e->key, ( int )e->data );
572     }
573 }
574 #endif
575