1 /* hashlib.c -- functions to manage and access hash tables for bash. */
2 
3 /* Copyright (C) 1987,1989,1991,1995,1998,2001,2003,2005,2006,2008,2009 Free Software Foundation, Inc.
4 
5    This file is part of GNU Bash, the Bourne Again SHell.
6 
7    Bash is free software: you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation, either version 3 of the License, or
10    (at your option) any later version.
11 
12    Bash is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with Bash.  If not, see <http://www.gnu.org/licenses/>.
19 */
20 
21 #include <config.h>
22 
23 #include "bashansi.h"
24 
25 #if defined (HAVE_UNISTD_H)
26 #  ifdef _MINIX
27 #    include <sys/types.h>
28 #  endif
29 #  include <unistd.h>
30 #endif
31 
32 #include <stdio.h>
33 
34 #include "shell.h"
35 #include "hashlib.h"
36 
37 /* tunable constants for rehashing */
38 #define HASH_REHASH_MULTIPLIER	4
39 #define HASH_REHASH_FACTOR	2
40 
41 #define HASH_SHOULDGROW(table) \
42   ((table)->nentries >= (table)->nbuckets * HASH_REHASH_FACTOR)
43 
44 /* an initial approximation */
45 #define HASH_SHOULDSHRINK(table) \
46   (((table)->nbuckets > DEFAULT_HASH_BUCKETS) && \
47    ((table)->nentries < (table)->nbuckets / HASH_REHASH_MULTIPLIER))
48 
49 /* Rely on properties of unsigned division (unsigned/int -> unsigned) and
50    don't discard the upper 32 bits of the value, if present. */
51 #define HASH_BUCKET(s, t, h) (((h) = hash_string (s)) & ((t)->nbuckets - 1))
52 
53 static BUCKET_CONTENTS *copy_bucket_array PARAMS((BUCKET_CONTENTS *, sh_string_func_t *));
54 
55 static void hash_rehash PARAMS((HASH_TABLE *, int));
56 static void hash_grow PARAMS((HASH_TABLE *));
57 static void hash_shrink PARAMS((HASH_TABLE *));
58 
59 /* Make a new hash table with BUCKETS number of buckets.  Initialize
60    each slot in the table to NULL. */
61 HASH_TABLE *
hash_create(buckets)62 hash_create (buckets)
63      int buckets;
64 {
65   HASH_TABLE *new_table;
66   register int i;
67 
68   new_table = (HASH_TABLE *)xmalloc (sizeof (HASH_TABLE));
69   if (buckets == 0)
70     buckets = DEFAULT_HASH_BUCKETS;
71 
72   new_table->bucket_array =
73     (BUCKET_CONTENTS **)xmalloc (buckets * sizeof (BUCKET_CONTENTS *));
74   new_table->nbuckets = buckets;
75   new_table->nentries = 0;
76 
77   for (i = 0; i < buckets; i++)
78     new_table->bucket_array[i] = (BUCKET_CONTENTS *)NULL;
79 
80   return (new_table);
81 }
82 
83 int
hash_size(table)84 hash_size (table)
85      HASH_TABLE *table;
86 {
87   return (HASH_ENTRIES(table));
88 }
89 
90 static BUCKET_CONTENTS *
copy_bucket_array(ba,cpdata)91 copy_bucket_array (ba, cpdata)
92      BUCKET_CONTENTS *ba;
93      sh_string_func_t *cpdata;	/* data copy function */
94 {
95   BUCKET_CONTENTS *new_bucket, *n, *e;
96 
97   if (ba == 0)
98     return ((BUCKET_CONTENTS *)0);
99 
100   for (n = (BUCKET_CONTENTS *)0, e = ba; e; e = e->next)
101     {
102       if (n == 0)
103         {
104           new_bucket = (BUCKET_CONTENTS *)xmalloc (sizeof (BUCKET_CONTENTS));
105           n = new_bucket;
106         }
107       else
108         {
109           n->next = (BUCKET_CONTENTS *)xmalloc (sizeof (BUCKET_CONTENTS));
110           n = n->next;
111         }
112 
113       n->key = savestring (e->key);
114       n->data = e->data ? (cpdata ? (*cpdata) (e->data) : savestring (e->data))
115 			: NULL;
116       n->khash = e->khash;
117       n->times_found = e->times_found;
118       n->next = (BUCKET_CONTENTS *)NULL;
119     }
120 
121   return new_bucket;
122 }
123 
124 static void
hash_rehash(table,nsize)125 hash_rehash (table, nsize)
126      HASH_TABLE *table;
127      int nsize;
128 {
129   int osize, i, j;
130   BUCKET_CONTENTS **old_bucket_array, *item, *next;
131 
132   if (table == NULL || nsize == table->nbuckets)
133     return;
134 
135   osize = table->nbuckets;
136   old_bucket_array = table->bucket_array;
137 
138   table->nbuckets = nsize;
139   table->bucket_array = (BUCKET_CONTENTS **)xmalloc (table->nbuckets * sizeof (BUCKET_CONTENTS *));
140   for (i = 0; i < table->nbuckets; i++)
141     table->bucket_array[i] = (BUCKET_CONTENTS *)NULL;
142 
143   for (j = 0; j < osize; j++)
144     {
145       for (item = old_bucket_array[j]; item; item = next)
146 	{
147 	  next = item->next;
148 	  i = item->khash & (table->nbuckets - 1);
149 	  item->next = table->bucket_array[i];
150 	  table->bucket_array[i] = item;
151 	}
152     }
153 
154   free (old_bucket_array);
155 }
156 
157 static void
hash_grow(table)158 hash_grow (table)
159      HASH_TABLE *table;
160 {
161   int nsize;
162 
163   nsize = table->nbuckets * HASH_REHASH_MULTIPLIER;
164   if (nsize > 0)		/* overflow */
165     hash_rehash (table, nsize);
166 }
167 
168 static void
hash_shrink(table)169 hash_shrink (table)
170      HASH_TABLE *table;
171 {
172   int nsize;
173 
174   nsize = table->nbuckets / HASH_REHASH_MULTIPLIER;
175   hash_rehash (table, nsize);
176 }
177 
178 HASH_TABLE *
hash_copy(table,cpdata)179 hash_copy (table, cpdata)
180      HASH_TABLE *table;
181      sh_string_func_t *cpdata;
182 {
183   HASH_TABLE *new_table;
184   int i;
185 
186   if (table == 0)
187     return ((HASH_TABLE *)NULL);
188 
189   new_table = hash_create (table->nbuckets);
190 
191   for (i = 0; i < table->nbuckets; i++)
192     new_table->bucket_array[i] = copy_bucket_array (table->bucket_array[i], cpdata);
193 
194   new_table->nentries = table->nentries;
195   return new_table;
196 }
197 
198 /* This is the best 32-bit string hash function I found. It's one of the
199    Fowler-Noll-Vo family (FNV-1).
200 
201    The magic is in the interesting relationship between the special prime
202    16777619 (2^24 + 403) and 2^32 and 2^8. */
203 
204 #define FNV_OFFSET 2166136261
205 #define FNV_PRIME 16777619
206 
207 /* If you want to use 64 bits, use
208 FNV_OFFSET	14695981039346656037
209 FNV_PRIME	1099511628211
210 */
211 
212 /* The `khash' check below requires that strings that compare equally with
213    strcmp hash to the same value. */
214 unsigned int
hash_string(s)215 hash_string (s)
216      const char *s;
217 {
218   register unsigned int i;
219 
220   for (i = FNV_OFFSET; *s; s++)
221     {
222       /* FNV-1a has the XOR first, traditional FNV-1 has the multiply first */
223 
224       /* was i *= FNV_PRIME */
225       i += (i<<1) + (i<<4) + (i<<7) + (i<<8) + (i<<24);
226       i ^= *s;
227     }
228 
229   return i;
230 }
231 
232 /* Return the location of the bucket which should contain the data
233    for STRING.  TABLE is a pointer to a HASH_TABLE. */
234 
235 int
hash_bucket(string,table)236 hash_bucket (string, table)
237      const char *string;
238      HASH_TABLE *table;
239 {
240   unsigned int h;
241 
242   return (HASH_BUCKET (string, table, h));
243 }
244 
245 /* Return a pointer to the hashed item.  If the HASH_CREATE flag is passed,
246    create a new hash table entry for STRING, otherwise return NULL. */
247 BUCKET_CONTENTS *
hash_search(string,table,flags)248 hash_search (string, table, flags)
249      const char *string;
250      HASH_TABLE *table;
251      int flags;
252 {
253   BUCKET_CONTENTS *list;
254   int bucket;
255   unsigned int hv;
256 
257   if (table == 0 || ((flags & HASH_CREATE) == 0 && HASH_ENTRIES (table) == 0))
258     return (BUCKET_CONTENTS *)NULL;
259 
260   bucket = HASH_BUCKET (string, table, hv);
261 
262   for (list = table->bucket_array ? table->bucket_array[bucket] : 0; list; list = list->next)
263     {
264       /* This is the comparison function */
265       if (hv == list->khash && STREQ (list->key, string))
266 	{
267 	  list->times_found++;
268 	  return (list);
269 	}
270     }
271 
272   if (flags & HASH_CREATE)
273     {
274       if (HASH_SHOULDGROW (table))
275 	{
276 	  hash_grow (table);
277 	  bucket = HASH_BUCKET (string, table, hv);
278 	}
279 
280       list = (BUCKET_CONTENTS *)xmalloc (sizeof (BUCKET_CONTENTS));
281       list->next = table->bucket_array[bucket];
282       table->bucket_array[bucket] = list;
283 
284       list->data = NULL;
285       list->key = (char *)string;	/* XXX fix later */
286       list->khash = hv;
287       list->times_found = 0;
288 
289       table->nentries++;
290       return (list);
291     }
292 
293   return (BUCKET_CONTENTS *)NULL;
294 }
295 
296 /* Remove the item specified by STRING from the hash table TABLE.
297    The item removed is returned, so you can free its contents.  If
298    the item isn't in this table NULL is returned. */
299 BUCKET_CONTENTS *
hash_remove(string,table,flags)300 hash_remove (string, table, flags)
301      const char *string;
302      HASH_TABLE *table;
303      int flags;
304 {
305   int bucket;
306   BUCKET_CONTENTS *prev, *temp;
307   unsigned int hv;
308 
309   if (table == 0 || HASH_ENTRIES (table) == 0)
310     return (BUCKET_CONTENTS *)NULL;
311 
312   bucket = HASH_BUCKET (string, table, hv);
313   prev = (BUCKET_CONTENTS *)NULL;
314   for (temp = table->bucket_array[bucket]; temp; temp = temp->next)
315     {
316       if (hv == temp->khash && STREQ (temp->key, string))
317 	{
318 	  if (prev)
319 	    prev->next = temp->next;
320 	  else
321 	    table->bucket_array[bucket] = temp->next;
322 
323 	  table->nentries--;
324 	  return (temp);
325 	}
326       prev = temp;
327     }
328   return ((BUCKET_CONTENTS *) NULL);
329 }
330 
331 /* Create an entry for STRING, in TABLE.  If the entry already
332    exists, then return it (unless the HASH_NOSRCH flag is set). */
333 BUCKET_CONTENTS *
hash_insert(string,table,flags)334 hash_insert (string, table, flags)
335      char *string;
336      HASH_TABLE *table;
337      int flags;
338 {
339   BUCKET_CONTENTS *item;
340   int bucket;
341   unsigned int hv;
342 
343   if (table == 0)
344     table = hash_create (0);
345 
346   item = (flags & HASH_NOSRCH) ? (BUCKET_CONTENTS *)NULL
347   			       : hash_search (string, table, 0);
348 
349   if (item == 0)
350     {
351       if (HASH_SHOULDGROW (table))
352 	hash_grow (table);
353 
354       bucket = HASH_BUCKET (string, table, hv);
355 
356       item = (BUCKET_CONTENTS *)xmalloc (sizeof (BUCKET_CONTENTS));
357       item->next = table->bucket_array[bucket];
358       table->bucket_array[bucket] = item;
359 
360       item->data = NULL;
361       item->key = string;
362       item->khash = hv;
363       item->times_found = 0;
364 
365       table->nentries++;
366     }
367 
368   return (item);
369 }
370 
371 /* Remove and discard all entries in TABLE.  If FREE_DATA is non-null, it
372    is a function to call to dispose of a hash item's data.  Otherwise,
373    free() is called. */
374 void
hash_flush(table,free_data)375 hash_flush (table, free_data)
376      HASH_TABLE *table;
377      sh_free_func_t *free_data;
378 {
379   int i;
380   register BUCKET_CONTENTS *bucket, *item;
381 
382   if (table == 0 || HASH_ENTRIES (table) == 0)
383     return;
384 
385   for (i = 0; i < table->nbuckets; i++)
386     {
387       bucket = table->bucket_array[i];
388 
389       while (bucket)
390 	{
391 	  item = bucket;
392 	  bucket = bucket->next;
393 
394 	  if (free_data)
395 	    (*free_data) (item->data);
396 	  else
397 	    free (item->data);
398 	  free (item->key);
399 	  free (item);
400 	}
401       table->bucket_array[i] = (BUCKET_CONTENTS *)NULL;
402     }
403 
404   table->nentries = 0;
405 }
406 
407 /* Free the hash table pointed to by TABLE. */
408 void
hash_dispose(table)409 hash_dispose (table)
410      HASH_TABLE *table;
411 {
412   free (table->bucket_array);
413   free (table);
414 }
415 
416 void
hash_walk(table,func)417 hash_walk (table, func)
418      HASH_TABLE *table;
419      hash_wfunc *func;
420 {
421   register int i;
422   BUCKET_CONTENTS *item;
423 
424   if (table == 0 || HASH_ENTRIES (table) == 0)
425     return;
426 
427   for (i = 0; i < table->nbuckets; i++)
428     {
429       for (item = hash_items (i, table); item; item = item->next)
430 	if ((*func) (item) < 0)
431 	  return;
432     }
433 }
434 
435 #if defined (DEBUG) || defined (TEST_HASHING)
436 void
hash_pstats(table,name)437 hash_pstats (table, name)
438      HASH_TABLE *table;
439      char *name;
440 {
441   register int slot, bcount;
442   register BUCKET_CONTENTS *bc;
443 
444   if (name == 0)
445     name = "unknown hash table";
446 
447   fprintf (stderr, "%s: %d buckets; %d items\n", name, table->nbuckets, table->nentries);
448 
449   /* Print out a count of how many strings hashed to each bucket, so we can
450      see how even the distribution is. */
451   for (slot = 0; slot < table->nbuckets; slot++)
452     {
453       bc = hash_items (slot, table);
454 
455       fprintf (stderr, "\tslot %3d: ", slot);
456       for (bcount = 0; bc; bc = bc->next)
457 	bcount++;
458 
459       fprintf (stderr, "%d\n", bcount);
460     }
461 }
462 #endif
463 
464 #ifdef TEST_HASHING
465 
466 /* link with xmalloc.o and lib/malloc/libmalloc.a */
467 #undef NULL
468 #include <stdio.h>
469 
470 #ifndef NULL
471 #define NULL 0
472 #endif
473 
474 HASH_TABLE *table, *ntable;
475 
476 int interrupt_immediately = 0;
477 int running_trap = 0;
478 
479 int
signal_is_trapped(s)480 signal_is_trapped (s)
481      int s;
482 {
483   return (0);
484 }
485 
486 void
programming_error(const char * format,...)487 programming_error (const char *format, ...)
488 {
489   abort();
490 }
491 
492 void
fatal_error(const char * format,...)493 fatal_error (const char *format, ...)
494 {
495   abort();
496 }
497 
498 void
internal_warning(const char * format,...)499 internal_warning (const char *format, ...)
500 {
501 }
502 
503 int
main()504 main ()
505 {
506   char string[256];
507   int count = 0;
508   BUCKET_CONTENTS *tt;
509 
510 #if defined (TEST_NBUCKETS)
511   table = hash_create (TEST_NBUCKETS);
512 #else
513   table = hash_create (0);
514 #endif
515 
516   for (;;)
517     {
518       char *temp_string;
519       if (fgets (string, sizeof (string), stdin) == 0)
520 	break;
521       if (!*string)
522 	break;
523       temp_string = savestring (string);
524       tt = hash_insert (temp_string, table, 0);
525       if (tt->times_found)
526 	{
527 	  fprintf (stderr, "You have already added item `%s'\n", string);
528 	  free (temp_string);
529 	}
530       else
531 	{
532 	  count++;
533 	}
534     }
535 
536   hash_pstats (table, "hash test");
537 
538   ntable = hash_copy (table, (sh_string_func_t *)NULL);
539   hash_flush (table, (sh_free_func_t *)NULL);
540   hash_pstats (ntable, "hash copy test");
541 
542   exit (0);
543 }
544 
545 #endif /* TEST_HASHING */
546