1 /* Hash tables.
2    Copyright (C) 2000-2011, 2015, 2018-2021 Free Software Foundation,
3    Inc.
4 
5 This file is part of GNU Wget.
6 
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or (at
10 your option) any later version.
11 
12 GNU Wget is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with Wget.  If not, see <http://www.gnu.org/licenses/>.
19 
20 Additional permission under GNU GPL version 3 section 7
21 
22 If you modify this program, or any covered work, by linking or
23 combining it with the OpenSSL project's OpenSSL library (or a
24 modified version of that library), containing parts covered by the
25 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
26 grants you additional permission to convey the resulting work.
27 Corresponding Source for a non-source form of such a combination
28 shall include the source code for the parts of OpenSSL used as well
29 as that of the covered work.  */
30 
31 /* With -DSTANDALONE, this file can be compiled outside Wget source
32    tree.  To test, also use -DTEST.  */
33 
34 #ifndef STANDALONE
35 # include "wget.h"
36 #endif
37 
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <assert.h>
41 #include <string.h>
42 #include <limits.h>
43 
44 #ifndef STANDALONE
45 /* Get Wget's utility headers. */
46 # include "utils.h"
47 #else
48 /* Make do without them. */
49 # define xnew(type) (xmalloc (sizeof (type)))
50 # define xnew0(type) (xcalloc (1, sizeof (type)))
51 # define xnew_array(type, len) (xmalloc ((len) * sizeof (type)))
52 # define xfree(p) do { free ((void *) (p)); p = NULL; } while (0)
53 
54 # ifndef countof
55 #  define countof(x) (sizeof (x) / sizeof ((x)[0]))
56 # endif
57 # include <ctype.h>
58 # define c_tolower(x) tolower ((unsigned char) (x))
59 # include <stdint.h>
60 #endif
61 
62 #include "hash.h"
63 
64 /* INTERFACE:
65 
66    Hash tables are a technique used to implement mapping between
67    objects with near-constant-time access and storage.  The table
68    associates keys to values, and a value can be very quickly
69    retrieved by providing the key.  Fast lookup tables are typically
70    implemented as hash tables.
71 
72    The entry points are
73      hash_table_new       -- creates the table.
74      hash_table_destroy   -- destroys the table.
75      hash_table_put       -- establishes or updates key->value mapping.
76      hash_table_get       -- retrieves value of key.
77      hash_table_get_pair  -- get key/value pair for key.
78      hash_table_contains  -- test whether the table contains key.
79      hash_table_remove    -- remove key->value mapping for given key.
80      hash_table_for_each  -- call function for each table entry.
81      hash_table_iterate   -- iterate over entries in hash table.
82      hash_table_iter_next -- return next element during iteration.
83      hash_table_clear     -- clear hash table contents.
84      hash_table_count     -- return the number of entries in the table.
85 
86    The hash table grows internally as new entries are added and is not
87    limited in size, except by available memory.  The table doubles
88    with each resize, which ensures that the amortized time per
89    operation remains constant.
90 
91    If not instructed otherwise, tables created by hash_table_new
92    consider the keys to be equal if their pointer values are the same.
93    You can use make_string_hash_table to create tables whose keys are
94    considered equal if their string contents are the same.  In the
95    general case, the criterion of equality used to compare keys is
96    specified at table creation time with two callback functions,
97    "hash" and "test".  The hash function transforms the key into an
98    arbitrary number that must be the same for two equal keys.  The
99    test function accepts two keys and returns non-zero if they are to
100    be considered equal.
101 
102    Note that neither keys nor values are copied when inserted into the
103    hash table, so they must exist for the lifetime of the table.  This
104    means that e.g. the use of static strings is OK, but objects with a
105    shorter life-time probably need to be copied (with strdup() or the
106    like in the case of strings) before being inserted.  */
107 
108 /* IMPLEMENTATION:
109 
110    The hash table is implemented as an open-addressed table with
111    linear probing collision resolution.
112 
113    The above means that all the cells (each cell containing a key and
114    a value pointer) are stored in a contiguous array.  Array position
115    of each cell is determined by the hash value of its key and the
116    size of the table: location := hash(key) % size.  If two different
117    keys end up on the same position (collide), the one that came
118    second is stored in the first unoccupied cell that follows it.
119    This collision resolution technique is called "linear probing".
120 
121    There are more advanced collision resolution methods (quadratic
122    probing, double hashing), but we don't use them because they incur
123    more non-sequential access to the array, which results in worse CPU
124    cache behavior.  Linear probing works well as long as the
125    count/size ratio (fullness) is kept below 75%.  We make sure to
126    grow and rehash the table whenever this threshold is exceeded.
127 
128    Collisions complicate deletion because simply clearing a cell
129    followed by previously collided entries would cause those neighbors
130    to not be picked up by find_cell later.  One solution is to leave a
131    "tombstone" marker instead of clearing the cell, and another is to
132    recalculate the positions of adjacent cells.  We take the latter
133    approach because it results in less bookkeeping garbage and faster
134    retrieval at the (slight) expense of deletion.  */
135 
136 /* Maximum allowed fullness: when hash table's fullness exceeds this
137    value, the table is resized.  */
138 #define HASH_MAX_FULLNESS 0.75
139 
140 /* The hash table size is multiplied by this factor (and then rounded
141    to the next prime) with each resize.  This guarantees infrequent
142    resizes.  */
143 #define HASH_RESIZE_FACTOR 2
144 
145 struct cell {
146   void *key;
147   void *value;
148 };
149 
150 typedef unsigned long (*hashfun_t) (const void *);
151 typedef int (*testfun_t) (const void *, const void *);
152 
153 struct hash_table {
154   hashfun_t hash_function;
155   testfun_t test_function;
156 
157   struct cell *cells;           /* contiguous array of cells. */
158   int size;                     /* size of the array. */
159 
160   int count;                    /* number of occupied entries. */
161   int resize_threshold;         /* after size exceeds this number of
162                                    entries, resize the table.  */
163   int prime_offset;             /* the offset of the current prime in
164                                    the prime table. */
165 };
166 
167 /* We use the all-bits-set constant (INVALID_PTR) marker to mean that
168    a cell is empty.  It is unaligned and therefore illegal as a
169    pointer.  INVALID_PTR_CHAR (0xff) is the single-character constant
170    used to initialize the entire cells array as empty.
171 
172    The all-bits-set value is a better choice than NULL because it
173    allows the use of NULL/0 keys.  Since the keys are either integers
174    or pointers, the only key that cannot be used is the integer value
175    -1.  This is acceptable because it still allows the use of
176    nonnegative integer keys.  */
177 
178 #define INVALID_PTR ((void *) ~(uintptr_t) 0)
179 #ifndef UCHAR_MAX
180 # define UCHAR_MAX 0xff
181 #endif
182 #define INVALID_PTR_CHAR UCHAR_MAX
183 
184 /* Whether the cell C is occupied (non-empty). */
185 #define CELL_OCCUPIED(c) ((c)->key != INVALID_PTR)
186 
187 /* Clear the cell C, i.e. mark it as empty (unoccupied). */
188 #define CLEAR_CELL(c) ((c)->key = INVALID_PTR)
189 
190 /* "Next" cell is the cell following C, but wrapping back to CELLS
191    when C would reach CELLS+SIZE.  */
192 #define NEXT_CELL(c, cells, size) (c != cells + (size - 1) ? c + 1 : cells)
193 
194 /* Loop over occupied cells starting at C, terminating the loop when
195    an empty cell is encountered.  */
196 #define FOREACH_OCCUPIED_ADJACENT(c, cells, size)                               \
197   for (; CELL_OCCUPIED (c); c = NEXT_CELL (c, cells, size))
198 
199 /* Return the position of KEY in hash table SIZE large, hash function
200    being HASHFUN.  */
201 #define HASH_POSITION(key, hashfun, size) ((hashfun) (key) % size)
202 
203 /* Find a prime near, but greater than or equal to SIZE.  The primes
204    are looked up from a table with a selection of primes convenient
205    for this purpose.
206 
207    PRIME_OFFSET is a minor optimization: it specifies start position
208    for the search for the large enough prime.  The final offset is
209    stored in the same variable.  That way the list of primes does not
210    have to be scanned from the beginning each time around.  */
211 
212 static int
prime_size(int size,int * prime_offset)213 prime_size (int size, int *prime_offset)
214 {
215   static const int primes[] = {
216     13, 19, 29, 41, 59, 79, 107, 149, 197, 263, 347, 457, 599, 787, 1031,
217     1361, 1777, 2333, 3037, 3967, 5167, 6719, 8737, 11369, 14783,
218     19219, 24989, 32491, 42257, 54941, 71429, 92861, 120721, 156941,
219     204047, 265271, 344857, 448321, 582821, 757693, 985003, 1280519,
220     1664681, 2164111, 2813353, 3657361, 4754591, 6180989, 8035301,
221     10445899, 13579681, 17653589, 22949669, 29834603, 38784989,
222     50420551, 65546729, 85210757, 110774011, 144006217, 187208107,
223     243370577, 316381771, 411296309, 534685237, 695090819, 903618083,
224     1174703521, 1527114613, 1837299131, 2147483647
225   };
226   size_t i;
227 
228   for (i = *prime_offset; i < countof (primes); i++)
229     if (primes[i] >= size)
230       {
231         /* Set the offset to the next prime.  That is safe because,
232            next time we are called, it will be with a larger SIZE,
233            which means we could never return the same prime anyway.
234            (If that is not the case, the caller can simply reset
235            *prime_offset.)  */
236         *prime_offset = i + 1;
237         return primes[i];
238       }
239 
240   abort ();
241 }
242 
243 static int cmp_pointer (const void *, const void *);
244 
245 /* Create a hash table with hash function HASH_FUNCTION and test
246    function TEST_FUNCTION.  The table is empty (its count is 0), but
247    pre-allocated to store at least ITEMS items.
248 
249    ITEMS is the number of items that the table can accept without
250    needing to resize.  It is useful when creating a table that is to
251    be immediately filled with a known number of items.  In that case,
252    the regrows are a waste of time, and specifying ITEMS correctly
253    will avoid them altogether.
254 
255    Note that hash tables grow dynamically regardless of ITEMS.  The
256    only use of ITEMS is to preallocate the table and avoid unnecessary
257    dynamic regrows.  Don't bother making ITEMS prime because it's not
258    used as size unchanged.  To start with a small table that grows as
259    needed, simply specify zero ITEMS.
260 
261    If hash and test callbacks are not specified, identity mapping is
262    assumed, i.e. pointer values are used for key comparison.  (Common
263    Lisp calls such tables EQ hash tables, and Java calls them
264    IdentityHashMaps.)  If your keys require different comparison,
265    specify hash and test functions.  For easy use of C strings as hash
266    keys, you can use the convenience functions make_string_hash_table
267    and make_nocase_string_hash_table.  */
268 
269 struct hash_table *
hash_table_new(int items,unsigned long (* hash_function)(const void *),int (* test_function)(const void *,const void *))270 hash_table_new (int items,
271                 unsigned long (*hash_function) (const void *),
272                 int (*test_function) (const void *, const void *))
273 {
274   int size;
275   struct hash_table *ht = xnew (struct hash_table);
276 
277   ht->hash_function = hash_function ? hash_function : hash_pointer;
278   ht->test_function = test_function ? test_function : cmp_pointer;
279 
280   /* If the size of struct hash_table ever becomes a concern, this
281      field can go.  (Wget doesn't create many hashes.)  */
282   ht->prime_offset = 0;
283 
284   /* Calculate the size that ensures that the table will store at
285      least ITEMS keys without the need to resize.  */
286   size = (int) (1 + items / HASH_MAX_FULLNESS);
287   size = prime_size (size, &ht->prime_offset);
288   ht->size = size;
289   ht->resize_threshold = (int) (size * HASH_MAX_FULLNESS);
290   /*assert (ht->resize_threshold >= items);*/
291 
292   ht->cells = xnew_array (struct cell, ht->size);
293 
294   /* Mark cells as empty.  We use 0xff rather than 0 to mark empty
295      keys because it allows us to use NULL/0 as keys.  */
296   memset (ht->cells, INVALID_PTR_CHAR, size * sizeof (struct cell));
297 
298   ht->count = 0;
299 
300   return ht;
301 }
302 
303 /* Free the data associated with hash table HT. */
304 
305 void
hash_table_destroy(struct hash_table * ht)306 hash_table_destroy (struct hash_table *ht)
307 {
308   xfree (ht->cells);
309   xfree (ht);
310 }
311 
312 /* The heart of most functions in this file -- find the cell whose
313    KEY is equal to key, using linear probing.  Returns the cell
314    that matches KEY, or the first empty cell if none matches.  */
315 
316 static inline struct cell *
find_cell(const struct hash_table * ht,const void * key)317 find_cell (const struct hash_table *ht, const void *key)
318 {
319   struct cell *cells = ht->cells;
320   int size = ht->size;
321   struct cell *c = cells + HASH_POSITION (key, ht->hash_function, size);
322   testfun_t equals = ht->test_function;
323 
324   FOREACH_OCCUPIED_ADJACENT (c, cells, size)
325     if (equals (key, c->key))
326       break;
327   return c;
328 }
329 
330 /* Get the value that corresponds to the key KEY in the hash table HT.
331    If no value is found, return NULL.  Note that NULL is a legal value
332    for value; if you are storing NULLs in your hash table, you can use
333    hash_table_contains to be sure that a (possibly NULL) value exists
334    in the table.  Or, you can use hash_table_get_pair instead of this
335    function.  */
336 
337 void *
hash_table_get(const struct hash_table * ht,const void * key)338 hash_table_get (const struct hash_table *ht, const void *key)
339 {
340   struct cell *c = find_cell (ht, key);
341   if (CELL_OCCUPIED (c))
342     return c->value;
343   else
344     return NULL;
345 }
346 
347 /* Like hash_table_get, but writes out the pointers to both key and
348    value.  Returns non-zero on success.  */
349 
350 int
hash_table_get_pair(const struct hash_table * ht,const void * lookup_key,void * orig_key,void * value)351 hash_table_get_pair (const struct hash_table *ht, const void *lookup_key,
352                      void *orig_key, void *value)
353 {
354   struct cell *c = find_cell (ht, lookup_key);
355   if (CELL_OCCUPIED (c))
356     {
357       if (orig_key)
358         *(void **)orig_key = c->key;
359       if (value)
360         *(void **)value = c->value;
361       return 1;
362     }
363   else
364     return 0;
365 }
366 
367 /* Return 1 if HT contains KEY, 0 otherwise. */
368 
369 int
hash_table_contains(const struct hash_table * ht,const void * key)370 hash_table_contains (const struct hash_table *ht, const void *key)
371 {
372   struct cell *c = find_cell (ht, key);
373   return CELL_OCCUPIED (c);
374 }
375 
376 /* Grow hash table HT as necessary, and rehash all the key-value
377    mappings.  */
378 
379 static void
grow_hash_table(struct hash_table * ht)380 grow_hash_table (struct hash_table *ht)
381 {
382   hashfun_t hasher = ht->hash_function;
383   struct cell *old_cells = ht->cells;
384   struct cell *old_end   = ht->cells + ht->size;
385   struct cell *c, *cells;
386   int newsize;
387 
388   newsize = prime_size (ht->size * HASH_RESIZE_FACTOR, &ht->prime_offset);
389 #if 0
390   printf ("growing from %d to %d; fullness %.2f%% to %.2f%%\n",
391           ht->size, newsize,
392           100.0 * ht->count / ht->size,
393           100.0 * ht->count / newsize);
394 #endif
395 
396   ht->size = newsize;
397   ht->resize_threshold = (int) (newsize * HASH_MAX_FULLNESS);
398 
399   cells = xnew_array (struct cell, newsize);
400   memset (cells, INVALID_PTR_CHAR, newsize * sizeof (struct cell));
401   ht->cells = cells;
402 
403   for (c = old_cells; c < old_end; c++)
404     if (CELL_OCCUPIED (c))
405       {
406         struct cell *new_c;
407         /* We don't need to test for uniqueness of keys because they
408            come from the hash table and are therefore known to be
409            unique.  */
410         new_c = cells + HASH_POSITION (c->key, hasher, newsize);
411         FOREACH_OCCUPIED_ADJACENT (new_c, cells, newsize)
412           ;
413         *new_c = *c;
414       }
415 
416   xfree (old_cells);
417 }
418 
419 /* Put VALUE in the hash table HT under the key KEY.  This regrows the
420    table if necessary.  */
421 
422 void
hash_table_put(struct hash_table * ht,const void * key,const void * value)423 hash_table_put (struct hash_table *ht, const void *key, const void *value)
424 {
425   struct cell *c = find_cell (ht, key);
426   if (CELL_OCCUPIED (c))
427     {
428       /* update existing item */
429       c->key   = (void *)key; /* const? */
430       c->value = (void *)value;
431       return;
432     }
433 
434   /* If adding the item would make the table exceed max. fullness,
435      grow the table first.  */
436   if (ht->count >= ht->resize_threshold)
437     {
438       grow_hash_table (ht);
439       c = find_cell (ht, key);
440     }
441 
442   /* add new item */
443   ++ht->count;
444   c->key   = (void *)key;       /* const? */
445   c->value = (void *)value;
446 }
447 
448 /* Remove KEY->value mapping from HT.  Return 0 if there was no such
449    entry; return 1 if an entry was removed.  */
450 
451 int
hash_table_remove(struct hash_table * ht,const void * key)452 hash_table_remove (struct hash_table *ht, const void *key)
453 {
454   struct cell *c = find_cell (ht, key);
455   if (!CELL_OCCUPIED (c))
456     return 0;
457   else
458     {
459       int size = ht->size;
460       struct cell *cells = ht->cells;
461       hashfun_t hasher = ht->hash_function;
462 
463       CLEAR_CELL (c);
464       --ht->count;
465 
466       /* Rehash all the entries following C.  The alternative
467          approach is to mark the entry as deleted, i.e. create a
468          "tombstone".  That speeds up removal, but leaves a lot of
469          garbage and slows down hash_table_get and hash_table_put.  */
470 
471       c = NEXT_CELL (c, cells, size);
472       FOREACH_OCCUPIED_ADJACENT (c, cells, size)
473         {
474           const void *key2 = c->key;
475           struct cell *c_new;
476 
477           /* Find the new location for the key. */
478           c_new = cells + HASH_POSITION (key2, hasher, size);
479           FOREACH_OCCUPIED_ADJACENT (c_new, cells, size)
480             if (key2 == c_new->key)
481               /* The cell C (key2) is already where we want it (in
482                  C_NEW's "chain" of keys.)  */
483               goto next_rehash;
484 
485           *c_new = *c;
486           CLEAR_CELL (c);
487 
488         next_rehash:
489           ;
490         }
491       return 1;
492     }
493 }
494 
495 /* Clear HT of all entries.  After calling this function, the count
496    and the fullness of the hash table will be zero.  The size will
497    remain unchanged.  */
498 
499 void
hash_table_clear(struct hash_table * ht)500 hash_table_clear (struct hash_table *ht)
501 {
502   memset (ht->cells, INVALID_PTR_CHAR, ht->size * sizeof (struct cell));
503   ht->count = 0;
504 }
505 
506 /* Call FN for each entry in HT.  FN is called with three arguments:
507    the key, the value, and ARG.  When FN returns a non-zero value, the
508    mapping stops.
509 
510    It is undefined what happens if you add or remove entries in the
511    hash table while hash_table_for_each is running.  The exception is
512    the entry you're currently mapping over; you may call
513    hash_table_put or hash_table_remove on that entry's key.  That is
514    also the reason why this function cannot be implemented in terms of
515    hash_table_iterate.  */
516 
517 void
hash_table_for_each(struct hash_table * ht,int (* fn)(void *,void *,void *),void * arg)518 hash_table_for_each (struct hash_table *ht,
519                      int (*fn) (void *, void *, void *), void *arg)
520 {
521   struct cell *c = ht->cells;
522   struct cell *end = ht->cells + ht->size;
523 
524   for (; c < end; c++)
525     if (CELL_OCCUPIED (c))
526       {
527         void *key;
528       repeat:
529         key = c->key;
530         if (fn (key, c->value, arg))
531           return;
532         /* hash_table_remove might have moved the adjacent cells. */
533         if (c->key != key && CELL_OCCUPIED (c))
534           goto repeat;
535       }
536 }
537 
538 /* Initiate iteration over HT.  Entries are obtained with
539    hash_table_iter_next, a typical iteration loop looking like this:
540 
541        hash_table_iterator iter;
542        for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
543          ... do something with iter.key and iter.value ...
544 
545    The iterator does not need to be deallocated after use.  The hash
546    table must not be modified while being iterated over.  */
547 
548 void
hash_table_iterate(struct hash_table * ht,hash_table_iterator * iter)549 hash_table_iterate (struct hash_table *ht, hash_table_iterator *iter)
550 {
551   iter->pos = ht->cells;
552   iter->end = ht->cells + ht->size;
553 }
554 
555 /* Get the next hash table entry.  ITER is an iterator object
556    initialized using hash_table_iterate.  While there are more
557    entries, the key and value pointers are stored to ITER->key and
558    ITER->value respectively and 1 is returned.  When there are no more
559    entries, 0 is returned.
560 
561    If the hash table is modified between calls to this function, the
562    result is undefined.  */
563 
564 int
hash_table_iter_next(hash_table_iterator * iter)565 hash_table_iter_next (hash_table_iterator *iter)
566 {
567   struct cell *c = iter->pos;
568   struct cell *end = iter->end;
569   for (; c < end; c++)
570     if (CELL_OCCUPIED (c))
571       {
572         iter->key = c->key;
573         iter->value = c->value;
574         iter->pos = c + 1;
575         return 1;
576       }
577   return 0;
578 }
579 
580 /* Return the number of elements in the hash table.  This is not the
581    same as the physical size of the hash table, which is always
582    greater than the number of elements.  */
583 
584 int
hash_table_count(const struct hash_table * ht)585 hash_table_count (const struct hash_table *ht)
586 {
587   return ht->count;
588 }
589 
590 /* Functions from this point onward are meant for convenience and
591    don't strictly belong to this file.  However, this is as good a
592    place for them as any.  */
593 
594 /* Guidelines for creating custom hash and test functions:
595 
596    - The test function returns non-zero for keys that are considered
597      "equal", zero otherwise.
598 
599    - The hash function returns a number that represents the
600      "distinctness" of the object.  In more precise terms, it means
601      that for any two objects that test "equal" under the test
602      function, the hash function MUST produce the same result.
603 
604      This does not mean that all different objects must produce
605      different values (that would be "perfect" hashing), only that
606      non-distinct objects must produce the same values!  For instance,
607      a hash function that returns 0 for any given object is a
608      perfectly valid (albeit extremely bad) hash function.  A hash
609      function that hashes a string by adding up all its characters is
610      another example of a valid (but still quite bad) hash function.
611 
612      It is not hard to make hash and test functions agree about
613      equality.  For example, if the test function compares strings
614      case-insensitively, the hash function can lower-case the
615      characters when calculating the hash value.  That ensures that
616      two strings differing only in case will hash the same.
617 
618    - To prevent performance degradation, choose a hash function with
619      as good "spreading" as possible.  A good hash function will use
620      all the bits of the input when calculating the hash, and will
621      react to even small changes in input with a completely different
622      output.  But don't make the hash function itself overly slow,
623      because you'll be incurring a non-negligible overhead to all hash
624      table operations.  */
625 
626 /*
627  * Support for hash tables whose keys are strings.
628  *
629  */
630 
631 /* Base 31 hash function.  Taken from Gnome's glib, modified to use
632    standard C types.
633 
634    We used to use the popular hash function from the Dragon Book, but
635    this one seems to perform much better, both by being faster and by
636    generating less collisions.  */
637 
638 #ifdef __clang__
639 __attribute__((no_sanitize("integer")))
640 #endif
641 static unsigned long
hash_string(const void * key)642 hash_string (const void *key)
643 {
644   const char *p = key;
645   unsigned int h = *p;
646 
647   if (h)
648     for (p += 1; *p != '\0'; p++)
649       h = (h << 5) - h + *p;
650 
651   return h;
652 }
653 
654 /* Frontend for strcmp usable for hash tables. */
655 
656 static int
cmp_string(const void * s1,const void * s2)657 cmp_string (const void *s1, const void *s2)
658 {
659   return !strcmp ((const char *)s1, (const char *)s2);
660 }
661 
662 /* Return a hash table of preallocated to store at least ITEMS items
663    suitable to use strings as keys.  */
664 
665 struct hash_table *
make_string_hash_table(int items)666 make_string_hash_table (int items)
667 {
668   return hash_table_new (items, hash_string, cmp_string);
669 }
670 
671 /*
672  * Support for hash tables whose keys are strings, but which are
673  * compared case-insensitively.
674  *
675  */
676 
677 /* Like hash_string, but produce the same hash regardless of the case. */
678 
679 #ifdef __clang__
680 __attribute__((no_sanitize("integer")))
681 #endif
682 static unsigned long
hash_string_nocase(const void * key)683 hash_string_nocase (const void *key)
684 {
685   const char *p = key;
686   unsigned int h = c_tolower (*p);
687 
688   if (h)
689     for (p += 1; *p != '\0'; p++)
690       h = (h << 5) - h + c_tolower (*p);
691 
692   return h;
693 }
694 
695 /* Like string_cmp, but doing case-insensitive comparison. */
696 
697 static int
string_cmp_nocase(const void * s1,const void * s2)698 string_cmp_nocase (const void *s1, const void *s2)
699 {
700   return !strcasecmp ((const char *)s1, (const char *)s2);
701 }
702 
703 /* Like make_string_hash_table, but uses string_hash_nocase and
704    string_cmp_nocase.  */
705 
706 struct hash_table *
make_nocase_string_hash_table(int items)707 make_nocase_string_hash_table (int items)
708 {
709   return hash_table_new (items, hash_string_nocase, string_cmp_nocase);
710 }
711 
712 /* Hashing of numeric values, such as pointers and integers.
713 
714    This implementation is the Robert Jenkins' 32 bit Mix Function,
715    with a simple adaptation for 64-bit values.  According to Jenkins
716    it should offer excellent spreading of values.  Unlike the popular
717    Knuth's multiplication hash, this function doesn't need to know the
718    hash table size to work.  */
719 
720 #ifdef __clang__
721 __attribute__((no_sanitize("integer")))
722 #endif
723 unsigned long
hash_pointer(const void * ptr)724 hash_pointer (const void *ptr)
725 {
726   uintptr_t key = (uintptr_t) ptr;
727   key += (key << 12);
728   key ^= (key >> 22);
729   key += (key << 4);
730   key ^= (key >> 9);
731   key += (key << 10);
732   key ^= (key >> 2);
733   key += (key << 7);
734   key ^= (key >> 12);
735 #if SIZEOF_VOID_P > 4
736   key += (key << 44);
737   key ^= (key >> 54);
738   key += (key << 36);
739   key ^= (key >> 41);
740   key += (key << 42);
741   key ^= (key >> 34);
742   key += (key << 39);
743   key ^= (key >> 44);
744 #endif
745   return (unsigned long) key;
746 }
747 
748 static int
cmp_pointer(const void * ptr1,const void * ptr2)749 cmp_pointer (const void *ptr1, const void *ptr2)
750 {
751   return ptr1 == ptr2;
752 }
753 
754 #ifdef TEST
755 
756 #include <stdio.h>
757 #include <string.h>
758 
759 void
print_hash(struct hash_table * sht)760 print_hash (struct hash_table *sht)
761 {
762   hash_table_iterator iter;
763   int count = 0;
764 
765   for (hash_table_iterate (sht, &iter); hash_table_iter_next (&iter);
766        ++count)
767     printf ("%s: %s\n", iter.key, iter.value);
768   assert (count == sht->count);
769 }
770 
771 int
main(void)772 main (void)
773 {
774   struct hash_table *ht = make_string_hash_table (0);
775   char line[80];
776 
777 #ifdef ENABLE_NLS
778   /* Set the current locale.  */
779   setlocale (LC_ALL, "");
780   /* Set the text message domain.  */
781   bindtextdomain ("wget", LOCALEDIR);
782   textdomain ("wget");
783 #endif /* ENABLE_NLS */
784 
785   while ((fgets (line, sizeof (line), stdin)))
786     {
787       int len = strlen (line);
788       if (len <= 1)
789         continue;
790       line[--len] = '\0';
791       if (!hash_table_contains (ht, line))
792         hash_table_put (ht, strdup (line), "here I am!");
793 #if 1
794       if (len % 5 == 0)
795         {
796           char *line_copy;
797           if (hash_table_get_pair (ht, line, &line_copy, NULL))
798             {
799               hash_table_remove (ht, line);
800               xfree (line_copy);
801             }
802         }
803 #endif
804     }
805 #if 0
806   print_hash (ht);
807 #endif
808 #if 1
809   printf ("%d %d\n", ht->count, ht->size);
810 #endif
811   return 0;
812 }
813 #endif /* TEST */
814