1 /* hash.c -- hash table maintenance
2 Copyright (C) 1995, 1999, 2002, 2010 Free Software Foundation, Inc.
3 Written by Greg McGary <gkm@gnu.org> <greg@mcgary.org>
4 
5 GNU Make is free software; you can redistribute it and/or modify it under the
6 terms of the GNU General Public License as published by the Free Software
7 Foundation; either version 3 of the License, or (at your option) any later
8 version.
9 
10 GNU Make is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12 A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
13 
14 You should have received a copy of the GNU General Public License along with
15 this program.  If not, see <http://www.gnu.org/licenses/>.  */
16 
17 #include "make.h"
18 #include "hash.h"
19 #ifdef CONFIG_WITH_STRCACHE2
20 # include <assert.h>
21 #endif
22 
23 
24 /*#define	CALLOC(t, n) ((t *) calloc (sizeof (t), (n)))*/
25 #define	CALLOC(t, n) ((t *) xcalloc (sizeof (t) * (n)))
26 #define MALLOC(t, n) ((t *) xmalloc (sizeof (t) * (n)))
27 #define REALLOC(o, t, n) ((t *) xrealloc ((o), sizeof (t) * (n)))
28 #define CLONE(o, t, n) ((t *) memcpy (MALLOC (t, (n)), (o), sizeof (t) * (n)))
29 
30 static void hash_rehash __P((struct hash_table* ht));
31 static unsigned long round_up_2 __P((unsigned long rough));
32 
33 /* Implement double hashing with open addressing.  The table size is
34    always a power of two.  The secondary (`increment') hash function
35    is forced to return an odd-value, in order to be relatively prime
36    to the table size.  This guarantees that the increment can
37    potentially hit every slot in the table during collision
38    resolution.  */
39 
40 void *hash_deleted_item = &hash_deleted_item;
41 
42 /* Force the table size to be a power of two, possibly rounding up the
43    given size.  */
44 
45 void
hash_init(struct hash_table * ht,unsigned long size,hash_func_t hash_1,hash_func_t hash_2,hash_cmp_func_t hash_cmp)46 hash_init (struct hash_table *ht, unsigned long size,
47            hash_func_t hash_1, hash_func_t hash_2, hash_cmp_func_t hash_cmp)
48 {
49   ht->ht_size = round_up_2 (size);
50   ht->ht_empty_slots = ht->ht_size;
51   ht->ht_vec = (void**) CALLOC (struct token *, ht->ht_size);
52   if (ht->ht_vec == 0)
53     {
54       fprintf (stderr, _("can't allocate %lu bytes for hash table: memory exhausted"),
55 	       ht->ht_size * (unsigned long) sizeof (struct token *));
56       exit (1);
57     }
58 
59   ht->ht_capacity = ht->ht_size - (ht->ht_size / 16); /* 93.75% loading factor */
60   ht->ht_fill = 0;
61   ht->ht_collisions = 0;
62   ht->ht_lookups = 0;
63   ht->ht_rehashes = 0;
64   ht->ht_hash_1 = hash_1;
65   ht->ht_hash_2 = hash_2;
66   ht->ht_compare = hash_cmp;
67 #ifdef CONFIG_WITH_STRCACHE2
68   ht->ht_strcache = 0;
69   ht->ht_off_string = 0;
70 #endif
71 }
72 
73 #ifdef CONFIG_WITH_STRCACHE2
74 /* Same as hash_init, except that no callbacks are needed since all
75    keys - including the ones being searched for - are from a string
76    cache.  This means that any give string will only have one pointer
77    value and that the hash and length can be retrived very cheaply,
78    thus permitting some nice optimizations.
79 
80    STRCACHE points to the string cache, while OFF_STRING gives the
81    offset of the string pointer in the item structures the hash table
82    entries points to.  */
hash_init_strcached(struct hash_table * ht,unsigned long size,struct strcache2 * strcache,unsigned int off_string)83 void hash_init_strcached (struct hash_table *ht, unsigned long size,
84                           struct strcache2 *strcache, unsigned int off_string)
85 {
86   hash_init (ht, size, 0, 0, 0);
87   ht->ht_strcache = strcache;
88   ht->ht_off_string = off_string;
89 }
90 #endif /* CONFIG_WITH_STRCACHE2 */
91 
92 /* Load an array of items into `ht'.  */
93 
94 void
hash_load(struct hash_table * ht,void * item_table,unsigned long cardinality,unsigned long size)95 hash_load (struct hash_table *ht, void *item_table,
96            unsigned long cardinality, unsigned long size)
97 {
98   char *items = (char *) item_table;
99 #ifndef CONFIG_WITH_STRCACHE2
100   while (cardinality--)
101     {
102       hash_insert (ht, items);
103       items += size;
104     }
105 #else  /* CONFIG_WITH_STRCACHE2 */
106   if (ht->ht_strcache)
107     while (cardinality--)
108       {
109         hash_insert_strcached (ht, items);
110         items += size;
111       }
112   else
113     while (cardinality--)
114       {
115         hash_insert (ht, items);
116         items += size;
117       }
118 #endif /* CONFIG_WITH_STRCACHE2 */
119 }
120 
121 /* Returns the address of the table slot matching `key'.  If `key' is
122    not found, return the address of an empty slot suitable for
123    inserting `key'.  The caller is responsible for incrementing
124    ht_fill on insertion.  */
125 
126 void **
hash_find_slot(struct hash_table * ht,const void * key)127 hash_find_slot (struct hash_table *ht, const void *key)
128 {
129   void **slot;
130   void **deleted_slot = 0;
131   unsigned int hash_2 = 0;
132   unsigned int hash_1 = (*ht->ht_hash_1) (key);
133 
134 #ifdef CONFIG_WITH_STRCACHE2
135   assert (ht->ht_strcache == 0);
136 #endif
137 
138   MAKE_STATS (ht->ht_lookups++);
139   MAKE_STATS_3 (make_stats_ht_lookups++);
140   for (;;)
141     {
142       hash_1 &= (ht->ht_size - 1);
143       slot = &ht->ht_vec[hash_1];
144 
145       if (*slot == 0)
146 	return (deleted_slot ? deleted_slot : slot);
147       if (*slot == hash_deleted_item)
148 	{
149 	  if (deleted_slot == 0)
150 	    deleted_slot = slot;
151 	}
152       else
153 	{
154 	  if (key == *slot)
155 	    return slot;
156 	  if ((*ht->ht_compare) (key, *slot) == 0)
157 	    return slot;
158 	  MAKE_STATS (ht->ht_collisions++);
159 	  MAKE_STATS_3 (make_stats_ht_collisions++);
160 	}
161       if (!hash_2)
162 	  hash_2 = (*ht->ht_hash_2) (key) | 1;
163       hash_1 += hash_2;
164     }
165 }
166 
167 #ifdef CONFIG_WITH_STRCACHE2
168 /* hash_find_slot version for tables created with hash_init_strcached.  */
169 void **
hash_find_slot_strcached(struct hash_table * ht,const void * key)170 hash_find_slot_strcached (struct hash_table *ht, const void *key)
171 {
172   void **slot;
173   void **deleted_slot = 0;
174   const char *str1 = *(const char **)((const char *)key + ht->ht_off_string);
175   const char *str2;
176   unsigned int hash_1 = strcache2_calc_ptr_hash (ht->ht_strcache, str1);
177   unsigned int hash_2;
178 
179 #ifdef CONFIG_WITH_STRCACHE2
180   assert (ht->ht_strcache != 0);
181 #endif
182 
183   MAKE_STATS (ht->ht_lookups++);
184   MAKE_STATS_3 (make_stats_ht_lookups++);
185 
186   /* first iteration unrolled. */
187 
188   hash_1 &= (ht->ht_size - 1);
189   slot = &ht->ht_vec[hash_1];
190   if (*slot == 0)
191     return slot;
192   if (*slot != hash_deleted_item)
193     {
194       str2 = *(const char **)((const char *)(*slot) + ht->ht_off_string);
195       if (str1 == str2)
196         return slot;
197 
198       MAKE_STATS (ht->ht_collisions++);
199       MAKE_STATS_3 (make_stats_ht_collisions++);
200     }
201   else
202     deleted_slot = slot;
203 
204   /* the rest of the loop. */
205 
206   hash_2 = strcache2_get_hash (ht->ht_strcache, str1) | 1;
207   hash_1 += hash_2;
208   for (;;)
209     {
210       hash_1 &= (ht->ht_size - 1);
211       slot = &ht->ht_vec[hash_1];
212 
213       if (*slot == 0)
214 	return (deleted_slot ? deleted_slot : slot);
215       if (*slot == hash_deleted_item)
216 	{
217 	  if (deleted_slot == 0)
218 	    deleted_slot = slot;
219 	}
220       else
221 	{
222           str2 = *(const char **)((const char *)(*slot) + ht->ht_off_string);
223           if (str1 == str2)
224 	    return slot;
225 
226 	  MAKE_STATS (ht->ht_collisions++);
227 	  MAKE_STATS_3 (make_stats_ht_collisions++);
228 	}
229 
230       hash_1 += hash_2;
231     }
232 }
233 #endif /* CONFIG_WITH_STRCACHE2 */
234 
235 void *
hash_find_item(struct hash_table * ht,const void * key)236 hash_find_item (struct hash_table *ht, const void *key)
237 {
238   void **slot = hash_find_slot (ht, key);
239   return ((HASH_VACANT (*slot)) ? 0 : *slot);
240 }
241 
242 #ifdef CONFIG_WITH_STRCACHE2
243 void *
hash_find_item_strcached(struct hash_table * ht,const void * key)244 hash_find_item_strcached (struct hash_table *ht, const void *key)
245 {
246   void **slot = hash_find_slot_strcached (ht, key);
247   return ((HASH_VACANT (*slot)) ? 0 : *slot);
248 }
249 #endif /* CONFIG_WITH_STRCACHE2 */
250 
251 void *
hash_insert(struct hash_table * ht,const void * item)252 hash_insert (struct hash_table *ht, const void *item)
253 {
254   void **slot = hash_find_slot (ht, item);
255   const void *old_item = *slot;
256   hash_insert_at (ht, item, slot);
257   return (void *)((HASH_VACANT (old_item)) ? 0 : old_item);
258 }
259 
260 #ifdef CONFIG_WITH_STRCACHE2
261 void *
hash_insert_strcached(struct hash_table * ht,const void * item)262 hash_insert_strcached (struct hash_table *ht, const void *item)
263 {
264   void **slot = hash_find_slot_strcached (ht, item);
265   const void *old_item = slot ? *slot : 0;
266   hash_insert_at (ht, item, slot);
267   return (void *)((HASH_VACANT (old_item)) ? 0 : old_item);
268 }
269 #endif /* CONFIG_WITH_STRCACHE2 */
270 
271 void *
hash_insert_at(struct hash_table * ht,const void * item,const void * slot)272 hash_insert_at (struct hash_table *ht, const void *item, const void *slot)
273 {
274   const void *old_item = *(void **) slot;
275   if (HASH_VACANT (old_item))
276     {
277       ht->ht_fill++;
278       if (old_item == 0)
279 	ht->ht_empty_slots--;
280       old_item = item;
281     }
282   *(void const **) slot = item;
283   if (ht->ht_empty_slots < ht->ht_size - ht->ht_capacity)
284     {
285       hash_rehash (ht);
286 #ifdef CONFIG_WITH_STRCACHE2
287       if (ht->ht_strcache)
288         return (void *)hash_find_slot_strcached (ht, item);
289 #endif /* CONFIG_WITH_STRCACHE2 */
290       return (void *) hash_find_slot (ht, item);
291     }
292   else
293     return (void *) slot;
294 }
295 
296 void *
hash_delete(struct hash_table * ht,const void * item)297 hash_delete (struct hash_table *ht, const void *item)
298 {
299   void **slot = hash_find_slot (ht, item);
300   return hash_delete_at (ht, slot);
301 }
302 
303 #ifdef CONFIG_WITH_STRCACHE2
304 void *
hash_delete_strcached(struct hash_table * ht,const void * item)305 hash_delete_strcached (struct hash_table *ht, const void *item)
306 {
307   void **slot = hash_find_slot_strcached (ht, item);
308   return hash_delete_at (ht, slot);
309 }
310 #endif /* CONFIG_WITH_STRCACHE2 */
311 
312 void *
hash_delete_at(struct hash_table * ht,const void * slot)313 hash_delete_at (struct hash_table *ht, const void *slot)
314 {
315   void *item = *(void **) slot;
316   if (!HASH_VACANT (item))
317     {
318       *(void const **) slot = hash_deleted_item;
319       ht->ht_fill--;
320       return item;
321     }
322   else
323     return 0;
324 }
325 
326 void
hash_free_items(struct hash_table * ht)327 hash_free_items (struct hash_table *ht)
328 {
329   void **vec = ht->ht_vec;
330   void **end = &vec[ht->ht_size];
331   for (; vec < end; vec++)
332     {
333       void *item = *vec;
334       if (!HASH_VACANT (item))
335 	free (item);
336       *vec = 0;
337     }
338   ht->ht_fill = 0;
339   ht->ht_empty_slots = ht->ht_size;
340 }
341 
342 #ifdef CONFIG_WITH_ALLOC_CACHES
343 void
hash_free_items_cached(struct hash_table * ht,struct alloccache * cache)344 hash_free_items_cached (struct hash_table *ht, struct alloccache *cache)
345 {
346   void **vec = ht->ht_vec;
347   void **end = &vec[ht->ht_size];
348   for (; vec < end; vec++)
349     {
350       void *item = *vec;
351       if (!HASH_VACANT (item))
352 	alloccache_free (cache, item);
353       *vec = 0;
354     }
355   ht->ht_fill = 0;
356   ht->ht_empty_slots = ht->ht_size;
357 }
358 #endif /* CONFIG_WITH_ALLOC_CACHES */
359 
360 void
hash_delete_items(struct hash_table * ht)361 hash_delete_items (struct hash_table *ht)
362 {
363   void **vec = ht->ht_vec;
364   void **end = &vec[ht->ht_size];
365   for (; vec < end; vec++)
366     *vec = 0;
367   ht->ht_fill = 0;
368   ht->ht_collisions = 0;
369   ht->ht_lookups = 0;
370   ht->ht_rehashes = 0;
371   ht->ht_empty_slots = ht->ht_size;
372 }
373 
374 void
hash_free(struct hash_table * ht,int free_items)375 hash_free (struct hash_table *ht, int free_items)
376 {
377   if (free_items)
378     hash_free_items (ht);
379   else
380     {
381       ht->ht_fill = 0;
382       ht->ht_empty_slots = ht->ht_size;
383     }
384   free (ht->ht_vec);
385   ht->ht_vec = 0;
386   ht->ht_capacity = 0;
387 }
388 
389 #ifdef CONFIG_WITH_ALLOC_CACHES
390 void
hash_free_cached(struct hash_table * ht,int free_items,struct alloccache * cache)391 hash_free_cached (struct hash_table *ht, int free_items, struct alloccache *cache)
392 {
393   if (free_items)
394     hash_free_items_cached (ht, cache);
395   else
396     {
397       ht->ht_fill = 0;
398       ht->ht_empty_slots = ht->ht_size;
399     }
400   free (ht->ht_vec);
401   ht->ht_vec = 0;
402   ht->ht_capacity = 0;
403 }
404 #endif /* CONFIG_WITH_ALLOC_CACHES */
405 
406 void
hash_map(struct hash_table * ht,hash_map_func_t map)407 hash_map (struct hash_table *ht, hash_map_func_t map)
408 {
409   void **slot;
410   void **end = &ht->ht_vec[ht->ht_size];
411 
412   for (slot = ht->ht_vec; slot < end; slot++)
413     {
414       if (!HASH_VACANT (*slot))
415 	(*map) (*slot);
416     }
417 }
418 
419 void
hash_map_arg(struct hash_table * ht,hash_map_arg_func_t map,void * arg)420 hash_map_arg (struct hash_table *ht, hash_map_arg_func_t map, void *arg)
421 {
422   void **slot;
423   void **end = &ht->ht_vec[ht->ht_size];
424 
425   for (slot = ht->ht_vec; slot < end; slot++)
426     {
427       if (!HASH_VACANT (*slot))
428 	(*map) (*slot, arg);
429     }
430 }
431 
432 /* Double the size of the hash table in the event of overflow... */
433 
434 static void
hash_rehash(struct hash_table * ht)435 hash_rehash (struct hash_table *ht)
436 {
437   unsigned long old_ht_size = ht->ht_size;
438   void **old_vec = ht->ht_vec;
439   void **ovp;
440 
441   if (ht->ht_fill >= ht->ht_capacity)
442     {
443       ht->ht_size *= 2;
444       ht->ht_capacity = ht->ht_size - (ht->ht_size >> 4);
445     }
446   ht->ht_rehashes++;
447   ht->ht_vec = (void **) CALLOC (struct token *, ht->ht_size);
448 
449 #ifndef CONFIG_WITH_STRCACHE2
450   for (ovp = old_vec; ovp < &old_vec[old_ht_size]; ovp++)
451     {
452       if (! HASH_VACANT (*ovp))
453 	{
454 	  void **slot = hash_find_slot (ht, *ovp);
455 	  *slot = *ovp;
456 	}
457     }
458 #else  /* CONFIG_WITH_STRCACHE2 */
459   if (ht->ht_strcache)
460     for (ovp = old_vec; ovp < &old_vec[old_ht_size]; ovp++)
461       {
462         if (! HASH_VACANT (*ovp))
463           {
464             void **slot = hash_find_slot_strcached (ht, *ovp);
465             *slot = *ovp;
466           }
467       }
468   else
469     for (ovp = old_vec; ovp < &old_vec[old_ht_size]; ovp++)
470       {
471         if (! HASH_VACANT (*ovp))
472           {
473             void **slot = hash_find_slot (ht, *ovp);
474             *slot = *ovp;
475           }
476       }
477 #endif /* CONFIG_WITH_STRCACHE2 */
478   ht->ht_empty_slots = ht->ht_size - ht->ht_fill;
479   free (old_vec);
480 }
481 
482 void
hash_print_stats(struct hash_table * ht,FILE * out_FILE)483 hash_print_stats (struct hash_table *ht, FILE *out_FILE)
484 {
485   /* GKM FIXME: honor NO_FLOAT */
486   fprintf (out_FILE, _("Load=%ld/%ld=%.0f%%, "), ht->ht_fill, ht->ht_size,
487 	   100.0 * (double) ht->ht_fill / (double) ht->ht_size);
488   fprintf (out_FILE, _("Rehash=%d, "), ht->ht_rehashes);
489   MAKE_STATS(
490   fprintf (out_FILE, _("Collisions=%ld/%ld=%.0f%%"), ht->ht_collisions, ht->ht_lookups,
491 	   (ht->ht_lookups
492 	    ? (100.0 * (double) ht->ht_collisions / (double) ht->ht_lookups)
493 	    : 0));
494   );
495 }
496 
497 /* Dump all items into a NULL-terminated vector.  Use the
498    user-supplied vector, or malloc one.  */
499 
500 void **
hash_dump(struct hash_table * ht,void ** vector_0,qsort_cmp_t compare)501 hash_dump (struct hash_table *ht, void **vector_0, qsort_cmp_t compare)
502 {
503   void **vector;
504   void **slot;
505   void **end = &ht->ht_vec[ht->ht_size];
506 
507   if (vector_0 == 0)
508     vector_0 = MALLOC (void *, ht->ht_fill + 1);
509   vector = vector_0;
510 
511   for (slot = ht->ht_vec; slot < end; slot++)
512     if (!HASH_VACANT (*slot))
513       *vector++ = *slot;
514   *vector = 0;
515 
516   if (compare)
517     qsort (vector_0, ht->ht_fill, sizeof (void *), compare);
518   return vector_0;
519 }
520 
521 /* Round a given number up to the nearest power of 2. */
522 
523 static unsigned long
round_up_2(unsigned long n)524 round_up_2 (unsigned long n)
525 {
526   n |= (n >> 1);
527   n |= (n >> 2);
528   n |= (n >> 4);
529   n |= (n >> 8);
530   n |= (n >> 16);
531 
532 #if !defined(HAVE_LIMITS_H) || ULONG_MAX > 4294967295
533   /* We only need this on systems where unsigned long is >32 bits.  */
534   n |= (n >> 32);
535 #endif
536 
537   return n + 1;
538 }
539