1 /* Hash tables.
2 Copyright (C) 2000-2011, 2015, 2018-2021 Free Software Foundation,
3 Inc.
4
5 This file is part of GNU Wget.
6
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or (at
10 your option) any later version.
11
12 GNU Wget is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Wget. If not, see <http://www.gnu.org/licenses/>.
19
20 Additional permission under GNU GPL version 3 section 7
21
22 If you modify this program, or any covered work, by linking or
23 combining it with the OpenSSL project's OpenSSL library (or a
24 modified version of that library), containing parts covered by the
25 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
26 grants you additional permission to convey the resulting work.
27 Corresponding Source for a non-source form of such a combination
28 shall include the source code for the parts of OpenSSL used as well
29 as that of the covered work. */
30
31 /* With -DSTANDALONE, this file can be compiled outside Wget source
32 tree. To test, also use -DTEST. */
33
34 #ifndef STANDALONE
35 # include "wget.h"
36 #endif
37
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <assert.h>
41 #include <string.h>
42 #include <limits.h>
43
44 #ifndef STANDALONE
45 /* Get Wget's utility headers. */
46 # include "utils.h"
47 #else
48 /* Make do without them. */
49 # define xnew(type) (xmalloc (sizeof (type)))
50 # define xnew0(type) (xcalloc (1, sizeof (type)))
51 # define xnew_array(type, len) (xmalloc ((len) * sizeof (type)))
52 # define xfree(p) do { free ((void *) (p)); p = NULL; } while (0)
53
54 # ifndef countof
55 # define countof(x) (sizeof (x) / sizeof ((x)[0]))
56 # endif
57 # include <ctype.h>
58 # define c_tolower(x) tolower ((unsigned char) (x))
59 # include <stdint.h>
60 #endif
61
62 #include "hash.h"
63
64 /* INTERFACE:
65
66 Hash tables are a technique used to implement mapping between
67 objects with near-constant-time access and storage. The table
68 associates keys to values, and a value can be very quickly
69 retrieved by providing the key. Fast lookup tables are typically
70 implemented as hash tables.
71
72 The entry points are
73 hash_table_new -- creates the table.
74 hash_table_destroy -- destroys the table.
75 hash_table_put -- establishes or updates key->value mapping.
76 hash_table_get -- retrieves value of key.
77 hash_table_get_pair -- get key/value pair for key.
78 hash_table_contains -- test whether the table contains key.
79 hash_table_remove -- remove key->value mapping for given key.
80 hash_table_for_each -- call function for each table entry.
81 hash_table_iterate -- iterate over entries in hash table.
82 hash_table_iter_next -- return next element during iteration.
83 hash_table_clear -- clear hash table contents.
84 hash_table_count -- return the number of entries in the table.
85
86 The hash table grows internally as new entries are added and is not
87 limited in size, except by available memory. The table doubles
88 with each resize, which ensures that the amortized time per
89 operation remains constant.
90
91 If not instructed otherwise, tables created by hash_table_new
92 consider the keys to be equal if their pointer values are the same.
93 You can use make_string_hash_table to create tables whose keys are
94 considered equal if their string contents are the same. In the
95 general case, the criterion of equality used to compare keys is
96 specified at table creation time with two callback functions,
97 "hash" and "test". The hash function transforms the key into an
98 arbitrary number that must be the same for two equal keys. The
99 test function accepts two keys and returns non-zero if they are to
100 be considered equal.
101
102 Note that neither keys nor values are copied when inserted into the
103 hash table, so they must exist for the lifetime of the table. This
104 means that e.g. the use of static strings is OK, but objects with a
105 shorter life-time probably need to be copied (with strdup() or the
106 like in the case of strings) before being inserted. */
107
108 /* IMPLEMENTATION:
109
110 The hash table is implemented as an open-addressed table with
111 linear probing collision resolution.
112
113 The above means that all the cells (each cell containing a key and
114 a value pointer) are stored in a contiguous array. Array position
115 of each cell is determined by the hash value of its key and the
116 size of the table: location := hash(key) % size. If two different
117 keys end up on the same position (collide), the one that came
118 second is stored in the first unoccupied cell that follows it.
119 This collision resolution technique is called "linear probing".
120
121 There are more advanced collision resolution methods (quadratic
122 probing, double hashing), but we don't use them because they incur
123 more non-sequential access to the array, which results in worse CPU
124 cache behavior. Linear probing works well as long as the
125 count/size ratio (fullness) is kept below 75%. We make sure to
126 grow and rehash the table whenever this threshold is exceeded.
127
128 Collisions complicate deletion because simply clearing a cell
129 followed by previously collided entries would cause those neighbors
130 to not be picked up by find_cell later. One solution is to leave a
131 "tombstone" marker instead of clearing the cell, and another is to
132 recalculate the positions of adjacent cells. We take the latter
133 approach because it results in less bookkeeping garbage and faster
134 retrieval at the (slight) expense of deletion. */
135
136 /* Maximum allowed fullness: when hash table's fullness exceeds this
137 value, the table is resized. */
138 #define HASH_MAX_FULLNESS 0.75
139
140 /* The hash table size is multiplied by this factor (and then rounded
141 to the next prime) with each resize. This guarantees infrequent
142 resizes. */
143 #define HASH_RESIZE_FACTOR 2
144
145 struct cell {
146 void *key;
147 void *value;
148 };
149
150 typedef unsigned long (*hashfun_t) (const void *);
151 typedef int (*testfun_t) (const void *, const void *);
152
153 struct hash_table {
154 hashfun_t hash_function;
155 testfun_t test_function;
156
157 struct cell *cells; /* contiguous array of cells. */
158 int size; /* size of the array. */
159
160 int count; /* number of occupied entries. */
161 int resize_threshold; /* after size exceeds this number of
162 entries, resize the table. */
163 int prime_offset; /* the offset of the current prime in
164 the prime table. */
165 };
166
167 /* We use the all-bits-set constant (INVALID_PTR) marker to mean that
168 a cell is empty. It is unaligned and therefore illegal as a
169 pointer. INVALID_PTR_CHAR (0xff) is the single-character constant
170 used to initialize the entire cells array as empty.
171
172 The all-bits-set value is a better choice than NULL because it
173 allows the use of NULL/0 keys. Since the keys are either integers
174 or pointers, the only key that cannot be used is the integer value
175 -1. This is acceptable because it still allows the use of
176 nonnegative integer keys. */
177
178 #define INVALID_PTR ((void *) ~(uintptr_t) 0)
179 #ifndef UCHAR_MAX
180 # define UCHAR_MAX 0xff
181 #endif
182 #define INVALID_PTR_CHAR UCHAR_MAX
183
184 /* Whether the cell C is occupied (non-empty). */
185 #define CELL_OCCUPIED(c) ((c)->key != INVALID_PTR)
186
187 /* Clear the cell C, i.e. mark it as empty (unoccupied). */
188 #define CLEAR_CELL(c) ((c)->key = INVALID_PTR)
189
190 /* "Next" cell is the cell following C, but wrapping back to CELLS
191 when C would reach CELLS+SIZE. */
192 #define NEXT_CELL(c, cells, size) (c != cells + (size - 1) ? c + 1 : cells)
193
194 /* Loop over occupied cells starting at C, terminating the loop when
195 an empty cell is encountered. */
196 #define FOREACH_OCCUPIED_ADJACENT(c, cells, size) \
197 for (; CELL_OCCUPIED (c); c = NEXT_CELL (c, cells, size))
198
199 /* Return the position of KEY in hash table SIZE large, hash function
200 being HASHFUN. */
201 #define HASH_POSITION(key, hashfun, size) ((hashfun) (key) % size)
202
203 /* Find a prime near, but greater than or equal to SIZE. The primes
204 are looked up from a table with a selection of primes convenient
205 for this purpose.
206
207 PRIME_OFFSET is a minor optimization: it specifies start position
208 for the search for the large enough prime. The final offset is
209 stored in the same variable. That way the list of primes does not
210 have to be scanned from the beginning each time around. */
211
212 static int
prime_size(int size,int * prime_offset)213 prime_size (int size, int *prime_offset)
214 {
215 static const int primes[] = {
216 13, 19, 29, 41, 59, 79, 107, 149, 197, 263, 347, 457, 599, 787, 1031,
217 1361, 1777, 2333, 3037, 3967, 5167, 6719, 8737, 11369, 14783,
218 19219, 24989, 32491, 42257, 54941, 71429, 92861, 120721, 156941,
219 204047, 265271, 344857, 448321, 582821, 757693, 985003, 1280519,
220 1664681, 2164111, 2813353, 3657361, 4754591, 6180989, 8035301,
221 10445899, 13579681, 17653589, 22949669, 29834603, 38784989,
222 50420551, 65546729, 85210757, 110774011, 144006217, 187208107,
223 243370577, 316381771, 411296309, 534685237, 695090819, 903618083,
224 1174703521, 1527114613, 1837299131, 2147483647
225 };
226 size_t i;
227
228 for (i = *prime_offset; i < countof (primes); i++)
229 if (primes[i] >= size)
230 {
231 /* Set the offset to the next prime. That is safe because,
232 next time we are called, it will be with a larger SIZE,
233 which means we could never return the same prime anyway.
234 (If that is not the case, the caller can simply reset
235 *prime_offset.) */
236 *prime_offset = i + 1;
237 return primes[i];
238 }
239
240 abort ();
241 }
242
243 static int cmp_pointer (const void *, const void *);
244
245 /* Create a hash table with hash function HASH_FUNCTION and test
246 function TEST_FUNCTION. The table is empty (its count is 0), but
247 pre-allocated to store at least ITEMS items.
248
249 ITEMS is the number of items that the table can accept without
250 needing to resize. It is useful when creating a table that is to
251 be immediately filled with a known number of items. In that case,
252 the regrows are a waste of time, and specifying ITEMS correctly
253 will avoid them altogether.
254
255 Note that hash tables grow dynamically regardless of ITEMS. The
256 only use of ITEMS is to preallocate the table and avoid unnecessary
257 dynamic regrows. Don't bother making ITEMS prime because it's not
258 used as size unchanged. To start with a small table that grows as
259 needed, simply specify zero ITEMS.
260
261 If hash and test callbacks are not specified, identity mapping is
262 assumed, i.e. pointer values are used for key comparison. (Common
263 Lisp calls such tables EQ hash tables, and Java calls them
264 IdentityHashMaps.) If your keys require different comparison,
265 specify hash and test functions. For easy use of C strings as hash
266 keys, you can use the convenience functions make_string_hash_table
267 and make_nocase_string_hash_table. */
268
269 struct hash_table *
hash_table_new(int items,unsigned long (* hash_function)(const void *),int (* test_function)(const void *,const void *))270 hash_table_new (int items,
271 unsigned long (*hash_function) (const void *),
272 int (*test_function) (const void *, const void *))
273 {
274 int size;
275 struct hash_table *ht = xnew (struct hash_table);
276
277 ht->hash_function = hash_function ? hash_function : hash_pointer;
278 ht->test_function = test_function ? test_function : cmp_pointer;
279
280 /* If the size of struct hash_table ever becomes a concern, this
281 field can go. (Wget doesn't create many hashes.) */
282 ht->prime_offset = 0;
283
284 /* Calculate the size that ensures that the table will store at
285 least ITEMS keys without the need to resize. */
286 size = (int) (1 + items / HASH_MAX_FULLNESS);
287 size = prime_size (size, &ht->prime_offset);
288 ht->size = size;
289 ht->resize_threshold = (int) (size * HASH_MAX_FULLNESS);
290 /*assert (ht->resize_threshold >= items);*/
291
292 ht->cells = xnew_array (struct cell, ht->size);
293
294 /* Mark cells as empty. We use 0xff rather than 0 to mark empty
295 keys because it allows us to use NULL/0 as keys. */
296 memset (ht->cells, INVALID_PTR_CHAR, size * sizeof (struct cell));
297
298 ht->count = 0;
299
300 return ht;
301 }
302
303 /* Free the data associated with hash table HT. */
304
305 void
hash_table_destroy(struct hash_table * ht)306 hash_table_destroy (struct hash_table *ht)
307 {
308 xfree (ht->cells);
309 xfree (ht);
310 }
311
312 /* The heart of most functions in this file -- find the cell whose
313 KEY is equal to key, using linear probing. Returns the cell
314 that matches KEY, or the first empty cell if none matches. */
315
316 static inline struct cell *
find_cell(const struct hash_table * ht,const void * key)317 find_cell (const struct hash_table *ht, const void *key)
318 {
319 struct cell *cells = ht->cells;
320 int size = ht->size;
321 struct cell *c = cells + HASH_POSITION (key, ht->hash_function, size);
322 testfun_t equals = ht->test_function;
323
324 FOREACH_OCCUPIED_ADJACENT (c, cells, size)
325 if (equals (key, c->key))
326 break;
327 return c;
328 }
329
330 /* Get the value that corresponds to the key KEY in the hash table HT.
331 If no value is found, return NULL. Note that NULL is a legal value
332 for value; if you are storing NULLs in your hash table, you can use
333 hash_table_contains to be sure that a (possibly NULL) value exists
334 in the table. Or, you can use hash_table_get_pair instead of this
335 function. */
336
337 void *
hash_table_get(const struct hash_table * ht,const void * key)338 hash_table_get (const struct hash_table *ht, const void *key)
339 {
340 struct cell *c = find_cell (ht, key);
341 if (CELL_OCCUPIED (c))
342 return c->value;
343 else
344 return NULL;
345 }
346
347 /* Like hash_table_get, but writes out the pointers to both key and
348 value. Returns non-zero on success. */
349
350 int
hash_table_get_pair(const struct hash_table * ht,const void * lookup_key,void * orig_key,void * value)351 hash_table_get_pair (const struct hash_table *ht, const void *lookup_key,
352 void *orig_key, void *value)
353 {
354 struct cell *c = find_cell (ht, lookup_key);
355 if (CELL_OCCUPIED (c))
356 {
357 if (orig_key)
358 *(void **)orig_key = c->key;
359 if (value)
360 *(void **)value = c->value;
361 return 1;
362 }
363 else
364 return 0;
365 }
366
367 /* Return 1 if HT contains KEY, 0 otherwise. */
368
369 int
hash_table_contains(const struct hash_table * ht,const void * key)370 hash_table_contains (const struct hash_table *ht, const void *key)
371 {
372 struct cell *c = find_cell (ht, key);
373 return CELL_OCCUPIED (c);
374 }
375
376 /* Grow hash table HT as necessary, and rehash all the key-value
377 mappings. */
378
379 static void
grow_hash_table(struct hash_table * ht)380 grow_hash_table (struct hash_table *ht)
381 {
382 hashfun_t hasher = ht->hash_function;
383 struct cell *old_cells = ht->cells;
384 struct cell *old_end = ht->cells + ht->size;
385 struct cell *c, *cells;
386 int newsize;
387
388 newsize = prime_size (ht->size * HASH_RESIZE_FACTOR, &ht->prime_offset);
389 #if 0
390 printf ("growing from %d to %d; fullness %.2f%% to %.2f%%\n",
391 ht->size, newsize,
392 100.0 * ht->count / ht->size,
393 100.0 * ht->count / newsize);
394 #endif
395
396 ht->size = newsize;
397 ht->resize_threshold = (int) (newsize * HASH_MAX_FULLNESS);
398
399 cells = xnew_array (struct cell, newsize);
400 memset (cells, INVALID_PTR_CHAR, newsize * sizeof (struct cell));
401 ht->cells = cells;
402
403 for (c = old_cells; c < old_end; c++)
404 if (CELL_OCCUPIED (c))
405 {
406 struct cell *new_c;
407 /* We don't need to test for uniqueness of keys because they
408 come from the hash table and are therefore known to be
409 unique. */
410 new_c = cells + HASH_POSITION (c->key, hasher, newsize);
411 FOREACH_OCCUPIED_ADJACENT (new_c, cells, newsize)
412 ;
413 *new_c = *c;
414 }
415
416 xfree (old_cells);
417 }
418
419 /* Put VALUE in the hash table HT under the key KEY. This regrows the
420 table if necessary. */
421
422 void
hash_table_put(struct hash_table * ht,const void * key,const void * value)423 hash_table_put (struct hash_table *ht, const void *key, const void *value)
424 {
425 struct cell *c = find_cell (ht, key);
426 if (CELL_OCCUPIED (c))
427 {
428 /* update existing item */
429 c->key = (void *)key; /* const? */
430 c->value = (void *)value;
431 return;
432 }
433
434 /* If adding the item would make the table exceed max. fullness,
435 grow the table first. */
436 if (ht->count >= ht->resize_threshold)
437 {
438 grow_hash_table (ht);
439 c = find_cell (ht, key);
440 }
441
442 /* add new item */
443 ++ht->count;
444 c->key = (void *)key; /* const? */
445 c->value = (void *)value;
446 }
447
448 /* Remove KEY->value mapping from HT. Return 0 if there was no such
449 entry; return 1 if an entry was removed. */
450
451 int
hash_table_remove(struct hash_table * ht,const void * key)452 hash_table_remove (struct hash_table *ht, const void *key)
453 {
454 struct cell *c = find_cell (ht, key);
455 if (!CELL_OCCUPIED (c))
456 return 0;
457 else
458 {
459 int size = ht->size;
460 struct cell *cells = ht->cells;
461 hashfun_t hasher = ht->hash_function;
462
463 CLEAR_CELL (c);
464 --ht->count;
465
466 /* Rehash all the entries following C. The alternative
467 approach is to mark the entry as deleted, i.e. create a
468 "tombstone". That speeds up removal, but leaves a lot of
469 garbage and slows down hash_table_get and hash_table_put. */
470
471 c = NEXT_CELL (c, cells, size);
472 FOREACH_OCCUPIED_ADJACENT (c, cells, size)
473 {
474 const void *key2 = c->key;
475 struct cell *c_new;
476
477 /* Find the new location for the key. */
478 c_new = cells + HASH_POSITION (key2, hasher, size);
479 FOREACH_OCCUPIED_ADJACENT (c_new, cells, size)
480 if (key2 == c_new->key)
481 /* The cell C (key2) is already where we want it (in
482 C_NEW's "chain" of keys.) */
483 goto next_rehash;
484
485 *c_new = *c;
486 CLEAR_CELL (c);
487
488 next_rehash:
489 ;
490 }
491 return 1;
492 }
493 }
494
495 /* Clear HT of all entries. After calling this function, the count
496 and the fullness of the hash table will be zero. The size will
497 remain unchanged. */
498
499 void
hash_table_clear(struct hash_table * ht)500 hash_table_clear (struct hash_table *ht)
501 {
502 memset (ht->cells, INVALID_PTR_CHAR, ht->size * sizeof (struct cell));
503 ht->count = 0;
504 }
505
506 /* Call FN for each entry in HT. FN is called with three arguments:
507 the key, the value, and ARG. When FN returns a non-zero value, the
508 mapping stops.
509
510 It is undefined what happens if you add or remove entries in the
511 hash table while hash_table_for_each is running. The exception is
512 the entry you're currently mapping over; you may call
513 hash_table_put or hash_table_remove on that entry's key. That is
514 also the reason why this function cannot be implemented in terms of
515 hash_table_iterate. */
516
517 void
hash_table_for_each(struct hash_table * ht,int (* fn)(void *,void *,void *),void * arg)518 hash_table_for_each (struct hash_table *ht,
519 int (*fn) (void *, void *, void *), void *arg)
520 {
521 struct cell *c = ht->cells;
522 struct cell *end = ht->cells + ht->size;
523
524 for (; c < end; c++)
525 if (CELL_OCCUPIED (c))
526 {
527 void *key;
528 repeat:
529 key = c->key;
530 if (fn (key, c->value, arg))
531 return;
532 /* hash_table_remove might have moved the adjacent cells. */
533 if (c->key != key && CELL_OCCUPIED (c))
534 goto repeat;
535 }
536 }
537
538 /* Initiate iteration over HT. Entries are obtained with
539 hash_table_iter_next, a typical iteration loop looking like this:
540
541 hash_table_iterator iter;
542 for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
543 ... do something with iter.key and iter.value ...
544
545 The iterator does not need to be deallocated after use. The hash
546 table must not be modified while being iterated over. */
547
548 void
hash_table_iterate(struct hash_table * ht,hash_table_iterator * iter)549 hash_table_iterate (struct hash_table *ht, hash_table_iterator *iter)
550 {
551 iter->pos = ht->cells;
552 iter->end = ht->cells + ht->size;
553 }
554
555 /* Get the next hash table entry. ITER is an iterator object
556 initialized using hash_table_iterate. While there are more
557 entries, the key and value pointers are stored to ITER->key and
558 ITER->value respectively and 1 is returned. When there are no more
559 entries, 0 is returned.
560
561 If the hash table is modified between calls to this function, the
562 result is undefined. */
563
564 int
hash_table_iter_next(hash_table_iterator * iter)565 hash_table_iter_next (hash_table_iterator *iter)
566 {
567 struct cell *c = iter->pos;
568 struct cell *end = iter->end;
569 for (; c < end; c++)
570 if (CELL_OCCUPIED (c))
571 {
572 iter->key = c->key;
573 iter->value = c->value;
574 iter->pos = c + 1;
575 return 1;
576 }
577 return 0;
578 }
579
580 /* Return the number of elements in the hash table. This is not the
581 same as the physical size of the hash table, which is always
582 greater than the number of elements. */
583
584 int
hash_table_count(const struct hash_table * ht)585 hash_table_count (const struct hash_table *ht)
586 {
587 return ht->count;
588 }
589
590 /* Functions from this point onward are meant for convenience and
591 don't strictly belong to this file. However, this is as good a
592 place for them as any. */
593
594 /* Guidelines for creating custom hash and test functions:
595
596 - The test function returns non-zero for keys that are considered
597 "equal", zero otherwise.
598
599 - The hash function returns a number that represents the
600 "distinctness" of the object. In more precise terms, it means
601 that for any two objects that test "equal" under the test
602 function, the hash function MUST produce the same result.
603
604 This does not mean that all different objects must produce
605 different values (that would be "perfect" hashing), only that
606 non-distinct objects must produce the same values! For instance,
607 a hash function that returns 0 for any given object is a
608 perfectly valid (albeit extremely bad) hash function. A hash
609 function that hashes a string by adding up all its characters is
610 another example of a valid (but still quite bad) hash function.
611
612 It is not hard to make hash and test functions agree about
613 equality. For example, if the test function compares strings
614 case-insensitively, the hash function can lower-case the
615 characters when calculating the hash value. That ensures that
616 two strings differing only in case will hash the same.
617
618 - To prevent performance degradation, choose a hash function with
619 as good "spreading" as possible. A good hash function will use
620 all the bits of the input when calculating the hash, and will
621 react to even small changes in input with a completely different
622 output. But don't make the hash function itself overly slow,
623 because you'll be incurring a non-negligible overhead to all hash
624 table operations. */
625
626 /*
627 * Support for hash tables whose keys are strings.
628 *
629 */
630
631 /* Base 31 hash function. Taken from Gnome's glib, modified to use
632 standard C types.
633
634 We used to use the popular hash function from the Dragon Book, but
635 this one seems to perform much better, both by being faster and by
636 generating less collisions. */
637
638 #ifdef __clang__
639 __attribute__((no_sanitize("integer")))
640 #endif
641 static unsigned long
hash_string(const void * key)642 hash_string (const void *key)
643 {
644 const char *p = key;
645 unsigned int h = *p;
646
647 if (h)
648 for (p += 1; *p != '\0'; p++)
649 h = (h << 5) - h + *p;
650
651 return h;
652 }
653
654 /* Frontend for strcmp usable for hash tables. */
655
656 static int
cmp_string(const void * s1,const void * s2)657 cmp_string (const void *s1, const void *s2)
658 {
659 return !strcmp ((const char *)s1, (const char *)s2);
660 }
661
662 /* Return a hash table of preallocated to store at least ITEMS items
663 suitable to use strings as keys. */
664
665 struct hash_table *
make_string_hash_table(int items)666 make_string_hash_table (int items)
667 {
668 return hash_table_new (items, hash_string, cmp_string);
669 }
670
671 /*
672 * Support for hash tables whose keys are strings, but which are
673 * compared case-insensitively.
674 *
675 */
676
677 /* Like hash_string, but produce the same hash regardless of the case. */
678
679 #ifdef __clang__
680 __attribute__((no_sanitize("integer")))
681 #endif
682 static unsigned long
hash_string_nocase(const void * key)683 hash_string_nocase (const void *key)
684 {
685 const char *p = key;
686 unsigned int h = c_tolower (*p);
687
688 if (h)
689 for (p += 1; *p != '\0'; p++)
690 h = (h << 5) - h + c_tolower (*p);
691
692 return h;
693 }
694
695 /* Like string_cmp, but doing case-insensitive comparison. */
696
697 static int
string_cmp_nocase(const void * s1,const void * s2)698 string_cmp_nocase (const void *s1, const void *s2)
699 {
700 return !strcasecmp ((const char *)s1, (const char *)s2);
701 }
702
703 /* Like make_string_hash_table, but uses string_hash_nocase and
704 string_cmp_nocase. */
705
706 struct hash_table *
make_nocase_string_hash_table(int items)707 make_nocase_string_hash_table (int items)
708 {
709 return hash_table_new (items, hash_string_nocase, string_cmp_nocase);
710 }
711
712 /* Hashing of numeric values, such as pointers and integers.
713
714 This implementation is the Robert Jenkins' 32 bit Mix Function,
715 with a simple adaptation for 64-bit values. According to Jenkins
716 it should offer excellent spreading of values. Unlike the popular
717 Knuth's multiplication hash, this function doesn't need to know the
718 hash table size to work. */
719
720 #ifdef __clang__
721 __attribute__((no_sanitize("integer")))
722 #endif
723 unsigned long
hash_pointer(const void * ptr)724 hash_pointer (const void *ptr)
725 {
726 uintptr_t key = (uintptr_t) ptr;
727 key += (key << 12);
728 key ^= (key >> 22);
729 key += (key << 4);
730 key ^= (key >> 9);
731 key += (key << 10);
732 key ^= (key >> 2);
733 key += (key << 7);
734 key ^= (key >> 12);
735 #if SIZEOF_VOID_P > 4
736 key += (key << 44);
737 key ^= (key >> 54);
738 key += (key << 36);
739 key ^= (key >> 41);
740 key += (key << 42);
741 key ^= (key >> 34);
742 key += (key << 39);
743 key ^= (key >> 44);
744 #endif
745 return (unsigned long) key;
746 }
747
748 static int
cmp_pointer(const void * ptr1,const void * ptr2)749 cmp_pointer (const void *ptr1, const void *ptr2)
750 {
751 return ptr1 == ptr2;
752 }
753
754 #ifdef TEST
755
756 #include <stdio.h>
757 #include <string.h>
758
759 void
print_hash(struct hash_table * sht)760 print_hash (struct hash_table *sht)
761 {
762 hash_table_iterator iter;
763 int count = 0;
764
765 for (hash_table_iterate (sht, &iter); hash_table_iter_next (&iter);
766 ++count)
767 printf ("%s: %s\n", iter.key, iter.value);
768 assert (count == sht->count);
769 }
770
771 int
main(void)772 main (void)
773 {
774 struct hash_table *ht = make_string_hash_table (0);
775 char line[80];
776
777 #ifdef ENABLE_NLS
778 /* Set the current locale. */
779 setlocale (LC_ALL, "");
780 /* Set the text message domain. */
781 bindtextdomain ("wget", LOCALEDIR);
782 textdomain ("wget");
783 #endif /* ENABLE_NLS */
784
785 while ((fgets (line, sizeof (line), stdin)))
786 {
787 int len = strlen (line);
788 if (len <= 1)
789 continue;
790 line[--len] = '\0';
791 if (!hash_table_contains (ht, line))
792 hash_table_put (ht, strdup (line), "here I am!");
793 #if 1
794 if (len % 5 == 0)
795 {
796 char *line_copy;
797 if (hash_table_get_pair (ht, line, &line_copy, NULL))
798 {
799 hash_table_remove (ht, line);
800 xfree (line_copy);
801 }
802 }
803 #endif
804 }
805 #if 0
806 print_hash (ht);
807 #endif
808 #if 1
809 printf ("%d %d\n", ht->count, ht->size);
810 #endif
811 return 0;
812 }
813 #endif /* TEST */
814