1
2
3 /*
4 * Dynamic hashing, after CACM April 1988 pp 446-457, by Per-Ake Larson.
5 * Coded into C, with minor code improvements, and with hsearch(3) interface,
6 * by ejp@ausmelb.oz, Jul 26, 1988: 13:16;
7 * also, hcreate/hdestroy routines added to simulate hsearch(3).
8 *
9 * move to local hash table, cleanup abstraction and storage allocation;
10 * convert to ANSII C.
11 * by snc (clark@cme.nbs.gov), 10-Mar-1989
12 *
13 * these routines simulate hsearch(3) and family, with the important
14 * difference that the hash table is dynamic - can grow indefinitely
15 * beyond its original size (as supplied to hcreate()).
16 *
17 * Performance appears to be comparable to that of hsearch(3).
18 * The 'source-code' options referred to in hsearch(3)'s 'man' page
19 * are not implemented; otherwise functionality is identical.
20 *
21 * Compilation controls:
22 * HASH_DEBUG controls some informative traces, mainly for debugging.
23 * HASH_STATISTICS causes HashAccesses and HashCollisions to be maintained;
24 * when combined with HASH_DEBUG, these are displayed by hdestroy().
25 *
26 * Problems & fixes to ejp@ausmelb.oz. WARNING: relies on pre-processor
27 * concatenation property, in probably unnecessary code 'optimisation'.
28 *
29 * snc: fixed concatenation for ANSII C, 10-Mar-1989
30 *
31 * $Log: hash.c,v $
32 * Revision 1.8 1997/10/22 16:36:49 sauderd
33 * Changed the use and definitions of the compiler macros MUL and DIV. They
34 * seem to be defined and used only within hash.h and hash.c
35 *
36 * Revision 1.7 1997/01/21 19:19:51 dar
37 * made C++ compatible
38 *
39 * Revision 1.6 1994/03/23 20:06:51 libes
40 * botched free in hash destroy
41 *
42 * Revision 1.5 1993/10/15 18:49:55 libes
43 * CADDETC certified
44 *
45 * Revision 1.4 1992/08/18 18:27:10 libes
46 * changed DEBUG to HASH_DEBUG
47 *
48 * Revision 1.3 1992/08/18 17:16:22 libes
49 * rm'd extraneous error messages
50 *
51 * Revision 1.2 1992/05/31 08:37:18 libes
52 * multiple files
53 *
54 * Revision 1.1 1992/05/28 03:56:55 libes
55 * Initial revision
56 *
57 * Revision 1.4 1992/05/14 10:15:04 libes
58 * don't remember
59 *
60 * Revision 1.3 1992/02/12 07:06:49 libes
61 * y
62 * y
63 * do sub/supertype
64 *
65 * Revision 1.2 1992/02/09 00:51:02 libes
66 * does ref/use correctly
67 *
68 * Revision 1.1 1992/02/05 08:34:24 libes
69 * Initial revision
70 *
71 * Revision 1.0.1.1 1992/01/22 02:40:04 libes
72 * copied from ~pdes
73 *
74 * Revision 1.9 1992/01/22 02:26:58 libes
75 * added code to test hash package
76 *
77 * Revision 1.8 1991/09/17 02:53:23 libes
78 * fixed bug in HASHcopy (new hash tables were coming out empty)
79 *
80 * Revision 1.7 1991/08/30 16:34:36 libes
81 * fixed HASHlist to use state from parameter instead of static
82 *
83 * Revision 1.6 1991/08/13 22:19:39 libes
84 * forgot to declare s, s2, pp, and q. Oops.
85 *
86 * Revision 1.5 1991/08/06 19:03:07 libes
87 * fixed bugs relating to my previous addition (delete function)
88 * added HASHcopy to support DICT_copy via OBJcopy
89 *
90 * Revision 1.4 1991/07/19 03:57:46 libes
91 * added action HASH_DELETE
92 * made action HASH_INSERT return failure upon duplicate entry
93 *
94 * Revision 1.3 1991/02/26 19:40:45 libes
95 * Added functions for visiting every member of the hash table, to duplicate
96 * similar functionality of linked lists. Note that current implementation
97 * can only walk through one hash table at a time. This can be improved, but
98 * the application code doesn't currently need such a feature.
99 *
100 * Revision 1.2 1990/09/06 11:06:56 clark
101 * BPR 2.1 alpha
102 *
103 * Revision 1.1 90/06/11 16:44:43 clark
104 * Initial revision
105 *
106 */
107
108 #include <sc_memmgr.h>
109 #include <assert.h>
110 #include <string.h>
111 #include <stdlib.h>
112 #include "express/hash.h"
113
114 struct freelist_head HASH_Table_fl;
115 struct freelist_head HASH_Element_fl;
116
117 /*
118 ** Internal routines
119 */
120
121 static_inline Address HASHhash( char *, Hash_Table );
122 static void HASHexpand_table( Hash_Table );
123
124 /*
125 ** Local data
126 */
127
128 # ifdef HASH_STATISTICS
129 static long HashAccesses, HashCollisions;
130 # endif
131
132 /*
133 ** Code
134 */
135
136 void
HASHinitialize()137 HASHinitialize() {
138 if( HASH_Table_fl.size_elt == 0 ) {
139 MEMinitialize( &HASH_Table_fl, sizeof( struct Hash_Table_ ), 50, 50 );
140 }
141 if( HASH_Element_fl.size_elt == 0 ) {
142 MEMinitialize( &HASH_Element_fl, sizeof( struct Element_ ), 500, 100 );
143 }
144 }
145
146 Hash_Table
HASHcreate(unsigned count)147 HASHcreate( unsigned count ) {
148 unsigned i;
149 Hash_Table table;
150
151 /*
152 ** Adjust Count to be nearest higher power of 2,
153 ** minimum SEGMENT_SIZE, then convert into segments.
154 */
155 i = SEGMENT_SIZE;
156 while( i < count ) {
157 i <<= 1;
158 }
159 count = DIV( i, SEGMENT_SIZE_SHIFT );
160
161 table = HASH_Table_new();
162 #if 0
163 table->in_use = 0;
164 #endif
165 table->SegmentCount = table->p = table->KeyCount = 0;
166 /*
167 ** Allocate initial 'i' segments of buckets
168 */
169 for( i = 0; i < count; i++ )
170 CALLOC( table->Directory[i], SEGMENT_SIZE, Element )
171 /*, "segment in HASHcreate");*/
172
173 table->SegmentCount = count;
174 table->maxp = MUL( count, SEGMENT_SIZE_SHIFT );
175 table->MinLoadFactor = 1;
176 table->MaxLoadFactor = MAX_LOAD_FACTOR;
177 # ifdef HASH_DEBUG
178 fprintf( stderr,
179 "[HASHcreate] table %x count %d maxp %d SegmentCount %d\n",
180 table,
181 count,
182 table->maxp,
183 table->SegmentCount );
184 # endif
185 # ifdef HASH_STATISTICS
186 HashAccesses = HashCollisions = 0;
187 # endif
188 return( table );
189 }
190
191 /* initialize pointer to beginning of hash table so we can step through it */
192 /* on repeated calls to HASHlist - DEL */
193 void
HASHlistinit(Hash_Table table,HashEntry * he)194 HASHlistinit( Hash_Table table, HashEntry * he ) {
195 he->i = he->j = 0;
196 he->p = 0;
197 he->table = table;
198 he->type = '*';
199 #if 0
200 table->in_use = 1;
201 #endif
202 }
203
204 void
HASHlistinit_by_type(Hash_Table table,HashEntry * he,char type)205 HASHlistinit_by_type( Hash_Table table, HashEntry * he, char type ) {
206 he->i = he->j = 0;
207 he->p = 0;
208 he->table = table;
209 he->type = type;
210 #if 0
211 table->in_use = 1;
212 #endif
213 }
214
215 #if 0
216 /* if you don't step to the end, you can clear the flag this way */
217 void
218 HASHlistend( HashEntry * he ) {
219 he->table->in_use = 0;
220 }
221 #endif
222
223 /* provide a way to step through the hash */
224 Element
HASHlist(HashEntry * he)225 HASHlist( HashEntry * he ) {
226 int i2 = he->i;
227 int j2 = he->j;
228 Segment s;
229
230 he->e = 0;
231
232 for( he->i = i2; he->i < he->table->SegmentCount; he->i++ ) {
233 /* test probably unnecessary */
234 if( ( s = he->table->Directory[he->i] ) != NULL ) {
235 for( he->j = j2; he->j < SEGMENT_SIZE; he->j++ ) {
236 if( !he->p ) {
237 he->p = s[he->j];
238 }
239
240 /* if he->p is defined, prepare to return it (by
241 setting it to he->e) and begin looking for a new value
242 for he->p
243 */
244 retry:
245 if( he->p ) {
246 if( ( he->type != '*' ) &&
247 ( he->type != he->p->type ) ) {
248 he->p = he->p->next;
249 goto retry;
250 }
251 if( he->e ) {
252 return( he->e );
253 }
254 he->e = he->p;
255 he->p = he->p->next;
256 }
257
258 /* avoid incrementing he->j by returning here */
259 if( he->p ) {
260 return( he->e );
261 }
262 }
263 j2 = 0;
264 }
265 }
266 /* if he->e was set then it is last one */
267 #if 0
268 he->table->in_use = 0;
269 #endif
270 return( he->e );
271 }
272
273 #if 0
274 /* this verifies no one else is walking through the table that we might screw up */
275 /* it should be called before adding, deleting or destroying a table */
276 HASH_in_use( Hash_Table table, char * action ) {
277 fprintf( stderr, "HASH: attempted to %s but hash table in use\n", action );
278 }
279 #endif
280
281 void
HASHdestroy(Hash_Table table)282 HASHdestroy( Hash_Table table ) {
283 Segment s;
284 Element p, q;
285
286 if( table != HASH_NULL ) {
287 unsigned int i, j;
288 #if 0
289 if( table->in_use ) {
290 HASH_in_use( table, "destroy hash table" );
291 }
292 #endif
293 for( i = 0; i < table->SegmentCount; i++ ) {
294 /* test probably unnecessary */
295 if( ( s = table->Directory[i] ) != NULL ) {
296 for( j = 0; j < SEGMENT_SIZE; j++ ) {
297 p = s[j];
298 while( p != NULL ) {
299 q = p->next;
300 HASH_Element_destroy( p );
301 p = q;
302 }
303 }
304 sc_free( table->Directory[i] );
305 }
306 }
307 HASH_Table_destroy( table );
308 # if defined(HASH_STATISTICS) && defined(HASH_DEBUG)
309 fprintf( stderr,
310 "[hdestroy] Accesses %ld Collisions %ld\n",
311 HashAccesses,
312 HashCollisions );
313 # endif
314 }
315 }
316
317 Element
HASHsearch(Hash_Table table,Element item,Action action)318 HASHsearch( Hash_Table table, Element item, Action action ) {
319 Address h;
320 Segment CurrentSegment;
321 int SegmentIndex;
322 int SegmentDir;
323 Segment p;
324 Element q;
325 Element deleteme;
326
327 assert( table != HASH_NULL ); /* Kinder really than return(NULL); */
328 # ifdef HASH_STATISTICS
329 HashAccesses++;
330 # endif
331 h = HASHhash( item->key, table );
332 SegmentDir = DIV( h, SEGMENT_SIZE_SHIFT );
333 SegmentIndex = MOD( h, SEGMENT_SIZE );
334 /*
335 ** valid segment ensured by HASHhash()
336 */
337 CurrentSegment = table->Directory[SegmentDir];
338 assert( CurrentSegment != NULL ); /* bad failure if tripped */
339 p = CurrentSegment + SegmentIndex;
340 q = *p;
341 /*
342 ** Follow collision chain
343 ** Now and after we finish this loop
344 ** p = &element, and
345 ** q = element
346 */
347 while( q != NULL && strcmp( q->key, item->key ) ) {
348 p = &q->next;
349 q = *p;
350 # ifdef HASH_STATISTICS
351 HashCollisions++;
352 # endif
353 }
354 /* at this point, we have either found the element or it doesn't exist */
355 switch( action ) {
356 case HASH_FIND:
357 return( ( Element )q );
358 case HASH_DELETE:
359 if( !q ) {
360 return( 0 );
361 }
362 /* at this point, element exists and action == DELETE */
363 #if 0
364 if( table->in_use ) {
365 HASH_in_use( table, "insert element" );
366 }
367 #endif
368 deleteme = q;
369 *p = q->next;
370 /*STRINGfree(deleteme->key);*/
371 HASH_Element_destroy( deleteme );
372 --table->KeyCount;
373 return( deleteme ); /* of course, user shouldn't deref this! */
374 case HASH_INSERT:
375 /* if trying to insert it (twice), let them know */
376 if( q != NULL ) {
377 return( q ); /* was return(0);!!!!!?!?! */
378 }
379
380 #if 0
381 if( table->in_use ) {
382 HASH_in_use( table, "delete element" );
383 }
384 #endif
385 /* at this point, element does not exist and action == INSERT */
386 q = HASH_Element_new();
387 *p = q; /* link into chain */
388 /*
389 ** Initialize new element
390 */
391 /* I don't see the point of copying the key!!!! */
392 /* q->key = STRINGcopy(item->key);*/
393 q->key = item->key;
394 q->data = item->data;
395 q->symbol = item->symbol;
396 q->type = item->type;
397 q->next = NULL;
398 /*
399 ** table over-full?
400 */
401 if( ++table->KeyCount / MUL( table->SegmentCount, SEGMENT_SIZE_SHIFT ) > table->MaxLoadFactor ) {
402 HASHexpand_table( table ); /* doesn't affect q */
403 }
404 }
405 return( ( Element )0 ); /* was return (Element)q */
406 }
407
408 /*
409 ** Internal routines
410 */
411
HASHhash(char * Key,Hash_Table table)412 static_inline Address HASHhash( char * Key, Hash_Table table ) {
413 Address h, address;
414 register unsigned char * k = ( unsigned char * )Key;
415
416 h = 0;
417 /*
418 ** Convert string to integer
419 */
420 /*SUPPRESS 112*/
421 assert( Key );
422 while( *k )
423 /*SUPPRESS 8*/ { /*SUPPRESS 112*/
424 h = h * PRIME1 ^ ( *k++ - ' ' );
425 }
426 h %= PRIME2;
427 address = MOD( h, table->maxp );
428 if( address < table->p ) {
429 address = MOD( h, ( table->maxp << 1 ) ); /* h % (2*table->maxp) */
430 }
431 return( address );
432 }
433
HASHexpand_table(Hash_Table table)434 static void HASHexpand_table( Hash_Table table ) {
435 Segment OldSegment, NewSegment;
436 Element Current, *Previous, *LastOfNew;
437
438 if( table->maxp + table->p < MUL( DIRECTORY_SIZE, SEGMENT_SIZE_SHIFT ) ) {
439 Address NewAddress;
440 int OldSegmentIndex, NewSegmentIndex;
441 int OldSegmentDir, NewSegmentDir;
442 /*
443 ** Locate the bucket to be split
444 */
445 OldSegmentDir = DIV( table->p, SEGMENT_SIZE_SHIFT );
446 OldSegment = table->Directory[OldSegmentDir];
447 OldSegmentIndex = MOD( table->p, SEGMENT_SIZE );
448 /*
449 ** Expand address space; if necessary create a new segment
450 */
451 NewAddress = table->maxp + table->p;
452 NewSegmentDir = DIV( NewAddress, SEGMENT_SIZE_SHIFT );
453 NewSegmentIndex = MOD( NewAddress, SEGMENT_SIZE );
454 if( NewSegmentIndex == 0 ) {
455 CALLOC( table->Directory[NewSegmentDir], SEGMENT_SIZE, Element );
456 }
457 /* "segment in HASHexpand_table");*/
458 NewSegment = table->Directory[NewSegmentDir];
459 /*
460 ** Adjust state variables
461 */
462 table->p++;
463 if( table->p == table->maxp ) {
464 table->maxp <<= 1; /* table->maxp *= 2 */
465 table->p = 0;
466 }
467 table->SegmentCount++;
468 /*
469 ** Relocate records to the new bucket
470 */
471 Previous = &OldSegment[OldSegmentIndex];
472 Current = *Previous;
473 LastOfNew = &NewSegment[NewSegmentIndex];
474 *LastOfNew = NULL;
475 while( Current != NULL ) {
476 if( HASHhash( Current->key, table ) == NewAddress ) {
477 /*
478 ** Attach it to the end of the new chain
479 */
480 *LastOfNew = Current;
481 /*
482 ** Remove it from old chain
483 */
484 *Previous = Current->next;
485 LastOfNew = &Current->next;
486 Current = Current->next;
487 *LastOfNew = NULL;
488 } else {
489 /*
490 ** leave it on the old chain
491 */
492 Previous = &Current->next;
493 Current = Current->next;
494 }
495 }
496 }
497 }
498
499 /* make a complete copy of a hash table */
500 /* Note that individual objects are shallow-copied. OBJcopy is not called! */
501 /* But then, it isn't called when objects are inserted/deleted so this seems */
502 /* reasonable - DEL */
503 Hash_Table
HASHcopy(Hash_Table oldtable)504 HASHcopy( Hash_Table oldtable ) {
505 Hash_Table newtable;
506 Segment s, s2;
507 Element * pp; /* old element */
508 Element * qq; /* new element */
509 unsigned int i, j;
510
511 newtable = HASH_Table_new();
512 for( i = 0; i < oldtable->SegmentCount; i++ ) {
513 CALLOC( newtable->Directory[i], SEGMENT_SIZE, Element );
514 /* "segment in HASHcopy");*/
515 }
516
517 newtable->p = oldtable->p;
518 newtable->SegmentCount = oldtable->SegmentCount;
519 newtable->maxp = oldtable->maxp;
520 newtable->MinLoadFactor = oldtable->MinLoadFactor;
521 newtable->MaxLoadFactor = oldtable->MaxLoadFactor;
522 newtable->KeyCount = oldtable->KeyCount;
523
524 for( i = 0; i < oldtable->SegmentCount; i++ ) {
525 /* test probably unnecessary */
526 if( ( s = oldtable->Directory[i] ) != NULL ) {
527 s2 = newtable->Directory[i];
528 for( j = 0; j < SEGMENT_SIZE; j++ ) {
529 qq = &s2[j];
530 for( pp = &s[j]; *pp; pp = &( *pp )->next ) {
531 *qq = HASH_Element_new();
532 /* (*qq)->key = STRINGcopy((*pp)->key);*/
533 /* I really doubt it is necessary to copy the key!!! */
534 ( *qq )->key = ( *pp )->key;
535 ( *qq )->data = ( *pp )->data;
536 ( *qq )->symbol = ( *pp )->symbol;
537 ( *qq )->type = ( *pp )->type;
538 ( *qq )->next = NULL;
539 qq = &( ( *qq )->next );
540 }
541 }
542 }
543 }
544 return( newtable );
545 }
546
547 /* following code is for testing hash package */
548 #ifdef HASHTEST
549 struct Element e1, e2, e3, *e;
550 struct Hash_Table * t;
551 HashEntry he;
552
main()553 main() {
554 e1.key = "foo";
555 e1.data = ( char * )1;
556 e2.key = "bar";
557 e2.data = ( char * )2;
558 e3.key = "herschel";
559 e3.data = ( char * )3;
560
561 t = HASHcreate( 100 );
562 e = HASHsearch( t, &e1, HASH_INSERT );
563 e = HASHsearch( t, &e2, HASH_INSERT );
564 e = HASHsearch( t, &e3, HASH_INSERT );
565 HASHlistinit( t, &he );
566 for( ;; ) {
567 e = HASHlist( &he );
568 if( !e ) {
569 exit( 0 );
570 }
571 fprintf( stderr, "found key %s, data %d\n", e->key, ( int )e->data );
572 }
573 }
574 #endif
575