1 /* hashlib.c -- functions to manage and access hash tables for bash. */
2
3 /* Copyright (C) 1987,1989,1991,1995,1998,2001,2003,2005,2006,2008,2009 Free Software Foundation, Inc.
4
5 This file is part of GNU Bash, the Bourne Again SHell.
6
7 Bash is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 Bash is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Bash. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21 #include <config.h>
22
23 #include "bashansi.h"
24
25 #if defined (HAVE_UNISTD_H)
26 # ifdef _MINIX
27 # include <sys/types.h>
28 # endif
29 # include <unistd.h>
30 #endif
31
32 #include <stdio.h>
33
34 #include "shell.h"
35 #include "hashlib.h"
36
37 /* tunable constants for rehashing */
38 #define HASH_REHASH_MULTIPLIER 4
39 #define HASH_REHASH_FACTOR 2
40
41 #define HASH_SHOULDGROW(table) \
42 ((table)->nentries >= (table)->nbuckets * HASH_REHASH_FACTOR)
43
44 /* an initial approximation */
45 #define HASH_SHOULDSHRINK(table) \
46 (((table)->nbuckets > DEFAULT_HASH_BUCKETS) && \
47 ((table)->nentries < (table)->nbuckets / HASH_REHASH_MULTIPLIER))
48
49 /* Rely on properties of unsigned division (unsigned/int -> unsigned) and
50 don't discard the upper 32 bits of the value, if present. */
51 #define HASH_BUCKET(s, t, h) (((h) = hash_string (s)) & ((t)->nbuckets - 1))
52
53 static BUCKET_CONTENTS *copy_bucket_array PARAMS((BUCKET_CONTENTS *, sh_string_func_t *));
54
55 static void hash_rehash PARAMS((HASH_TABLE *, int));
56 static void hash_grow PARAMS((HASH_TABLE *));
57 static void hash_shrink PARAMS((HASH_TABLE *));
58
59 /* Make a new hash table with BUCKETS number of buckets. Initialize
60 each slot in the table to NULL. */
61 HASH_TABLE *
hash_create(buckets)62 hash_create (buckets)
63 int buckets;
64 {
65 HASH_TABLE *new_table;
66 register int i;
67
68 new_table = (HASH_TABLE *)xmalloc (sizeof (HASH_TABLE));
69 if (buckets == 0)
70 buckets = DEFAULT_HASH_BUCKETS;
71
72 new_table->bucket_array =
73 (BUCKET_CONTENTS **)xmalloc (buckets * sizeof (BUCKET_CONTENTS *));
74 new_table->nbuckets = buckets;
75 new_table->nentries = 0;
76
77 for (i = 0; i < buckets; i++)
78 new_table->bucket_array[i] = (BUCKET_CONTENTS *)NULL;
79
80 return (new_table);
81 }
82
83 int
hash_size(table)84 hash_size (table)
85 HASH_TABLE *table;
86 {
87 return (HASH_ENTRIES(table));
88 }
89
90 static BUCKET_CONTENTS *
copy_bucket_array(ba,cpdata)91 copy_bucket_array (ba, cpdata)
92 BUCKET_CONTENTS *ba;
93 sh_string_func_t *cpdata; /* data copy function */
94 {
95 BUCKET_CONTENTS *new_bucket, *n, *e;
96
97 if (ba == 0)
98 return ((BUCKET_CONTENTS *)0);
99
100 for (n = (BUCKET_CONTENTS *)0, e = ba; e; e = e->next)
101 {
102 if (n == 0)
103 {
104 new_bucket = (BUCKET_CONTENTS *)xmalloc (sizeof (BUCKET_CONTENTS));
105 n = new_bucket;
106 }
107 else
108 {
109 n->next = (BUCKET_CONTENTS *)xmalloc (sizeof (BUCKET_CONTENTS));
110 n = n->next;
111 }
112
113 n->key = savestring (e->key);
114 n->data = e->data ? (cpdata ? (*cpdata) (e->data) : savestring (e->data))
115 : NULL;
116 n->khash = e->khash;
117 n->times_found = e->times_found;
118 n->next = (BUCKET_CONTENTS *)NULL;
119 }
120
121 return new_bucket;
122 }
123
124 static void
hash_rehash(table,nsize)125 hash_rehash (table, nsize)
126 HASH_TABLE *table;
127 int nsize;
128 {
129 int osize, i, j;
130 BUCKET_CONTENTS **old_bucket_array, *item, *next;
131
132 if (table == NULL || nsize == table->nbuckets)
133 return;
134
135 osize = table->nbuckets;
136 old_bucket_array = table->bucket_array;
137
138 table->nbuckets = nsize;
139 table->bucket_array = (BUCKET_CONTENTS **)xmalloc (table->nbuckets * sizeof (BUCKET_CONTENTS *));
140 for (i = 0; i < table->nbuckets; i++)
141 table->bucket_array[i] = (BUCKET_CONTENTS *)NULL;
142
143 for (j = 0; j < osize; j++)
144 {
145 for (item = old_bucket_array[j]; item; item = next)
146 {
147 next = item->next;
148 i = item->khash & (table->nbuckets - 1);
149 item->next = table->bucket_array[i];
150 table->bucket_array[i] = item;
151 }
152 }
153
154 free (old_bucket_array);
155 }
156
157 static void
hash_grow(table)158 hash_grow (table)
159 HASH_TABLE *table;
160 {
161 int nsize;
162
163 nsize = table->nbuckets * HASH_REHASH_MULTIPLIER;
164 if (nsize > 0) /* overflow */
165 hash_rehash (table, nsize);
166 }
167
168 static void
hash_shrink(table)169 hash_shrink (table)
170 HASH_TABLE *table;
171 {
172 int nsize;
173
174 nsize = table->nbuckets / HASH_REHASH_MULTIPLIER;
175 hash_rehash (table, nsize);
176 }
177
178 HASH_TABLE *
hash_copy(table,cpdata)179 hash_copy (table, cpdata)
180 HASH_TABLE *table;
181 sh_string_func_t *cpdata;
182 {
183 HASH_TABLE *new_table;
184 int i;
185
186 if (table == 0)
187 return ((HASH_TABLE *)NULL);
188
189 new_table = hash_create (table->nbuckets);
190
191 for (i = 0; i < table->nbuckets; i++)
192 new_table->bucket_array[i] = copy_bucket_array (table->bucket_array[i], cpdata);
193
194 new_table->nentries = table->nentries;
195 return new_table;
196 }
197
198 /* This is the best 32-bit string hash function I found. It's one of the
199 Fowler-Noll-Vo family (FNV-1).
200
201 The magic is in the interesting relationship between the special prime
202 16777619 (2^24 + 403) and 2^32 and 2^8. */
203
204 #define FNV_OFFSET 2166136261
205 #define FNV_PRIME 16777619
206
207 /* If you want to use 64 bits, use
208 FNV_OFFSET 14695981039346656037
209 FNV_PRIME 1099511628211
210 */
211
212 /* The `khash' check below requires that strings that compare equally with
213 strcmp hash to the same value. */
214 unsigned int
hash_string(s)215 hash_string (s)
216 const char *s;
217 {
218 register unsigned int i;
219
220 for (i = FNV_OFFSET; *s; s++)
221 {
222 /* FNV-1a has the XOR first, traditional FNV-1 has the multiply first */
223
224 /* was i *= FNV_PRIME */
225 i += (i<<1) + (i<<4) + (i<<7) + (i<<8) + (i<<24);
226 i ^= *s;
227 }
228
229 return i;
230 }
231
232 /* Return the location of the bucket which should contain the data
233 for STRING. TABLE is a pointer to a HASH_TABLE. */
234
235 int
hash_bucket(string,table)236 hash_bucket (string, table)
237 const char *string;
238 HASH_TABLE *table;
239 {
240 unsigned int h;
241
242 return (HASH_BUCKET (string, table, h));
243 }
244
245 /* Return a pointer to the hashed item. If the HASH_CREATE flag is passed,
246 create a new hash table entry for STRING, otherwise return NULL. */
247 BUCKET_CONTENTS *
hash_search(string,table,flags)248 hash_search (string, table, flags)
249 const char *string;
250 HASH_TABLE *table;
251 int flags;
252 {
253 BUCKET_CONTENTS *list;
254 int bucket;
255 unsigned int hv;
256
257 if (table == 0 || ((flags & HASH_CREATE) == 0 && HASH_ENTRIES (table) == 0))
258 return (BUCKET_CONTENTS *)NULL;
259
260 bucket = HASH_BUCKET (string, table, hv);
261
262 for (list = table->bucket_array ? table->bucket_array[bucket] : 0; list; list = list->next)
263 {
264 /* This is the comparison function */
265 if (hv == list->khash && STREQ (list->key, string))
266 {
267 list->times_found++;
268 return (list);
269 }
270 }
271
272 if (flags & HASH_CREATE)
273 {
274 if (HASH_SHOULDGROW (table))
275 {
276 hash_grow (table);
277 bucket = HASH_BUCKET (string, table, hv);
278 }
279
280 list = (BUCKET_CONTENTS *)xmalloc (sizeof (BUCKET_CONTENTS));
281 list->next = table->bucket_array[bucket];
282 table->bucket_array[bucket] = list;
283
284 list->data = NULL;
285 list->key = (char *)string; /* XXX fix later */
286 list->khash = hv;
287 list->times_found = 0;
288
289 table->nentries++;
290 return (list);
291 }
292
293 return (BUCKET_CONTENTS *)NULL;
294 }
295
296 /* Remove the item specified by STRING from the hash table TABLE.
297 The item removed is returned, so you can free its contents. If
298 the item isn't in this table NULL is returned. */
299 BUCKET_CONTENTS *
hash_remove(string,table,flags)300 hash_remove (string, table, flags)
301 const char *string;
302 HASH_TABLE *table;
303 int flags;
304 {
305 int bucket;
306 BUCKET_CONTENTS *prev, *temp;
307 unsigned int hv;
308
309 if (table == 0 || HASH_ENTRIES (table) == 0)
310 return (BUCKET_CONTENTS *)NULL;
311
312 bucket = HASH_BUCKET (string, table, hv);
313 prev = (BUCKET_CONTENTS *)NULL;
314 for (temp = table->bucket_array[bucket]; temp; temp = temp->next)
315 {
316 if (hv == temp->khash && STREQ (temp->key, string))
317 {
318 if (prev)
319 prev->next = temp->next;
320 else
321 table->bucket_array[bucket] = temp->next;
322
323 table->nentries--;
324 return (temp);
325 }
326 prev = temp;
327 }
328 return ((BUCKET_CONTENTS *) NULL);
329 }
330
331 /* Create an entry for STRING, in TABLE. If the entry already
332 exists, then return it (unless the HASH_NOSRCH flag is set). */
333 BUCKET_CONTENTS *
hash_insert(string,table,flags)334 hash_insert (string, table, flags)
335 char *string;
336 HASH_TABLE *table;
337 int flags;
338 {
339 BUCKET_CONTENTS *item;
340 int bucket;
341 unsigned int hv;
342
343 if (table == 0)
344 table = hash_create (0);
345
346 item = (flags & HASH_NOSRCH) ? (BUCKET_CONTENTS *)NULL
347 : hash_search (string, table, 0);
348
349 if (item == 0)
350 {
351 if (HASH_SHOULDGROW (table))
352 hash_grow (table);
353
354 bucket = HASH_BUCKET (string, table, hv);
355
356 item = (BUCKET_CONTENTS *)xmalloc (sizeof (BUCKET_CONTENTS));
357 item->next = table->bucket_array[bucket];
358 table->bucket_array[bucket] = item;
359
360 item->data = NULL;
361 item->key = string;
362 item->khash = hv;
363 item->times_found = 0;
364
365 table->nentries++;
366 }
367
368 return (item);
369 }
370
371 /* Remove and discard all entries in TABLE. If FREE_DATA is non-null, it
372 is a function to call to dispose of a hash item's data. Otherwise,
373 free() is called. */
374 void
hash_flush(table,free_data)375 hash_flush (table, free_data)
376 HASH_TABLE *table;
377 sh_free_func_t *free_data;
378 {
379 int i;
380 register BUCKET_CONTENTS *bucket, *item;
381
382 if (table == 0 || HASH_ENTRIES (table) == 0)
383 return;
384
385 for (i = 0; i < table->nbuckets; i++)
386 {
387 bucket = table->bucket_array[i];
388
389 while (bucket)
390 {
391 item = bucket;
392 bucket = bucket->next;
393
394 if (free_data)
395 (*free_data) (item->data);
396 else
397 free (item->data);
398 free (item->key);
399 free (item);
400 }
401 table->bucket_array[i] = (BUCKET_CONTENTS *)NULL;
402 }
403
404 table->nentries = 0;
405 }
406
407 /* Free the hash table pointed to by TABLE. */
408 void
hash_dispose(table)409 hash_dispose (table)
410 HASH_TABLE *table;
411 {
412 free (table->bucket_array);
413 free (table);
414 }
415
416 void
hash_walk(table,func)417 hash_walk (table, func)
418 HASH_TABLE *table;
419 hash_wfunc *func;
420 {
421 register int i;
422 BUCKET_CONTENTS *item;
423
424 if (table == 0 || HASH_ENTRIES (table) == 0)
425 return;
426
427 for (i = 0; i < table->nbuckets; i++)
428 {
429 for (item = hash_items (i, table); item; item = item->next)
430 if ((*func) (item) < 0)
431 return;
432 }
433 }
434
435 #if defined (DEBUG) || defined (TEST_HASHING)
436 void
hash_pstats(table,name)437 hash_pstats (table, name)
438 HASH_TABLE *table;
439 char *name;
440 {
441 register int slot, bcount;
442 register BUCKET_CONTENTS *bc;
443
444 if (name == 0)
445 name = "unknown hash table";
446
447 fprintf (stderr, "%s: %d buckets; %d items\n", name, table->nbuckets, table->nentries);
448
449 /* Print out a count of how many strings hashed to each bucket, so we can
450 see how even the distribution is. */
451 for (slot = 0; slot < table->nbuckets; slot++)
452 {
453 bc = hash_items (slot, table);
454
455 fprintf (stderr, "\tslot %3d: ", slot);
456 for (bcount = 0; bc; bc = bc->next)
457 bcount++;
458
459 fprintf (stderr, "%d\n", bcount);
460 }
461 }
462 #endif
463
464 #ifdef TEST_HASHING
465
466 /* link with xmalloc.o and lib/malloc/libmalloc.a */
467 #undef NULL
468 #include <stdio.h>
469
470 #ifndef NULL
471 #define NULL 0
472 #endif
473
474 HASH_TABLE *table, *ntable;
475
476 int interrupt_immediately = 0;
477 int running_trap = 0;
478
479 int
signal_is_trapped(s)480 signal_is_trapped (s)
481 int s;
482 {
483 return (0);
484 }
485
486 void
programming_error(const char * format,...)487 programming_error (const char *format, ...)
488 {
489 abort();
490 }
491
492 void
fatal_error(const char * format,...)493 fatal_error (const char *format, ...)
494 {
495 abort();
496 }
497
498 void
internal_warning(const char * format,...)499 internal_warning (const char *format, ...)
500 {
501 }
502
503 int
main()504 main ()
505 {
506 char string[256];
507 int count = 0;
508 BUCKET_CONTENTS *tt;
509
510 #if defined (TEST_NBUCKETS)
511 table = hash_create (TEST_NBUCKETS);
512 #else
513 table = hash_create (0);
514 #endif
515
516 for (;;)
517 {
518 char *temp_string;
519 if (fgets (string, sizeof (string), stdin) == 0)
520 break;
521 if (!*string)
522 break;
523 temp_string = savestring (string);
524 tt = hash_insert (temp_string, table, 0);
525 if (tt->times_found)
526 {
527 fprintf (stderr, "You have already added item `%s'\n", string);
528 free (temp_string);
529 }
530 else
531 {
532 count++;
533 }
534 }
535
536 hash_pstats (table, "hash test");
537
538 ntable = hash_copy (table, (sh_string_func_t *)NULL);
539 hash_flush (table, (sh_free_func_t *)NULL);
540 hash_pstats (ntable, "hash copy test");
541
542 exit (0);
543 }
544
545 #endif /* TEST_HASHING */
546