1 /*
2  * Copyright (C) 1996-2021 The Squid Software Foundation and contributors
3  *
4  * Squid software is distributed under GPLv2+ license and includes
5  * contributions from numerous individuals and organizations.
6  * Please see the COPYING and CONTRIBUTORS files for details.
7  */
8 
9 /* DEBUG: section 00    Hash Tables */
10 
11 #include "squid.h"
12 #include "hash.h"
13 #include "profiler/Profiler.h"
14 
15 #include <cassert>
16 #include <cmath>
17 #include <cstdlib>
18 #include <cstring>
19 #if HAVE_UNISTD_H
20 #include <unistd.h>
21 #endif
22 #if HAVE_GNUMALLLOC_H
23 #include <gnumalloc.h>
24 #elif HAVE_MALLOC_H
25 #include <malloc.h>
26 #endif
27 
28 static void hash_next_bucket(hash_table * hid);
29 
30 unsigned int
hash_string(const void * data,unsigned int size)31 hash_string(const void *data, unsigned int size)
32 {
33     const unsigned char *s = static_cast<const unsigned char *>(data);
34     unsigned int n = 0;
35     unsigned int j = 0;
36     unsigned int i = 0;
37     while (*s) {
38         ++j;
39         n ^= 271 * *s;
40         ++s;
41     }
42     i = n ^ (j * 271);
43     return i % size;
44 }
45 
46 /* the following function(s) were adapted from
47  *    usr/src/lib/libc/db/hash_func.c, 4.4 BSD lite */
48 
49 /* Hash function from Chris Torek. */
50 unsigned int
hash4(const void * data,unsigned int size)51 hash4(const void *data, unsigned int size)
52 {
53     const char *key = static_cast<const char *>(data);
54     size_t loop;
55     unsigned int h;
56     size_t len;
57 
58 #define HASH4a   h = (h << 5) - h + *key++;
59 #define HASH4b   h = (h << 5) + h + *key++;
60 #define HASH4 HASH4b
61 
62     h = 0;
63     len = strlen(key);
64     loop = len >> 3;
65     switch (len & (8 - 1)) {
66     case 0:
67         break;
68     case 7:
69         HASH4;
70     /* FALLTHROUGH */
71     case 6:
72         HASH4;
73     /* FALLTHROUGH */
74     case 5:
75         HASH4;
76     /* FALLTHROUGH */
77     case 4:
78         HASH4;
79     /* FALLTHROUGH */
80     case 3:
81         HASH4;
82     /* FALLTHROUGH */
83     case 2:
84         HASH4;
85     /* FALLTHROUGH */
86     case 1:
87         HASH4;
88     }
89     while (loop) {
90         --loop;
91         HASH4;
92         HASH4;
93         HASH4;
94         HASH4;
95         HASH4;
96         HASH4;
97         HASH4;
98         HASH4;
99     }
100     return h % size;
101 }
102 
103 /**
104  *  hash_create - creates a new hash table, uses the cmp_func
105  *  to compare keys.  Returns the identification for the hash table;
106  *  otherwise returns a negative number on error.
107  */
108 hash_table *
hash_create(HASHCMP * cmp_func,int hash_sz,HASHHASH * hash_func)109 hash_create(HASHCMP * cmp_func, int hash_sz, HASHHASH * hash_func)
110 {
111     hash_table *hid = (hash_table *)xcalloc(1, sizeof(hash_table));
112     if (!hash_sz)
113         hid->size = (unsigned int) DEFAULT_HASH_SIZE;
114     else
115         hid->size = (unsigned int) hash_sz;
116     /* allocate and null the buckets */
117     hid->buckets = (hash_link **)xcalloc(hid->size, sizeof(hash_link *));
118     hid->cmp = cmp_func;
119     hid->hash = hash_func;
120     hid->next = NULL;
121     hid->current_slot = 0;
122     return hid;
123 }
124 
125 /**
126  *  hash_join - joins a hash_link under its key lnk->key
127  *  into the hash table 'hid'.
128  *
129  *  It does not copy any data into the hash table, only links pointers.
130  */
131 void
hash_join(hash_table * hid,hash_link * lnk)132 hash_join(hash_table * hid, hash_link * lnk)
133 {
134     int i;
135     i = hid->hash(lnk->key, hid->size);
136     lnk->next = hid->buckets[i];
137     hid->buckets[i] = lnk;
138     ++hid->count;
139 }
140 
141 /**
142  *  hash_lookup - locates the item under the key 'k' in the hash table
143  *  'hid'.  Returns a pointer to the hash bucket on success; otherwise
144  *  returns NULL.
145  */
146 hash_link *
hash_lookup(hash_table * hid,const void * k)147 hash_lookup(hash_table * hid, const void *k)
148 {
149     int b;
150     PROF_start(hash_lookup);
151     assert(k != NULL);
152     b = hid->hash(k, hid->size);
153     for (hash_link *walker = hid->buckets[b]; walker != NULL; walker = walker->next) {
154         if ((hid->cmp) (k, walker->key) == 0) {
155             PROF_stop(hash_lookup);
156             return (walker);
157         }
158         assert(walker != walker->next);
159     }
160     PROF_stop(hash_lookup);
161     return NULL;
162 }
163 
164 static void
hash_next_bucket(hash_table * hid)165 hash_next_bucket(hash_table * hid)
166 {
167     while (hid->next == NULL && ++hid->current_slot < hid->size)
168         hid->next = hid->buckets[hid->current_slot];
169 }
170 
171 /**
172  *  hash_first - initializes the hash table for the hash_next()
173  *  function.
174  */
175 void
hash_first(hash_table * hid)176 hash_first(hash_table * hid)
177 {
178     assert(NULL == hid->next);
179     hid->current_slot = 0;
180     hid->next = hid->buckets[hid->current_slot];
181     if (NULL == hid->next)
182         hash_next_bucket(hid);
183 }
184 
185 /**
186  *  hash_next - returns the next item in the hash table 'hid'.
187  *  Otherwise, returns NULL on error or end of list.
188  *
189  *  MUST call hash_first() before hash_next().
190  */
191 hash_link *
hash_next(hash_table * hid)192 hash_next(hash_table * hid)
193 {
194     hash_link *p = hid->next;
195     if (NULL == p)
196         return NULL;
197     hid->next = p->next;
198     if (NULL == hid->next)
199         hash_next_bucket(hid);
200     return p;
201 }
202 
203 /**
204  *  hash_last - resets hash traversal state to NULL
205  *
206  */
207 void
hash_last(hash_table * hid)208 hash_last(hash_table * hid)
209 {
210     assert(hid != NULL);
211     hid->next = NULL;
212     hid->current_slot = 0;
213 }
214 
215 /**
216  *  hash_remove_link - deletes the given hash_link node from the
217  *  hash table 'hid'.  Does not free the item, only removes it
218  *  from the list.
219  *
220  *  An assertion is triggered if the hash_link is not found in the
221  *  list.
222  */
223 void
hash_remove_link(hash_table * hid,hash_link * hl)224 hash_remove_link(hash_table * hid, hash_link * hl)
225 {
226     assert(hl != NULL);
227     int i = hid->hash(hl->key, hid->size);
228     for (hash_link **P = &hid->buckets[i]; *P; P = &(*P)->next) {
229         if (*P != hl)
230             continue;
231         *P = hl->next;
232         if (hid->next == hl) {
233             hid->next = hl->next;
234             if (NULL == hid->next)
235                 hash_next_bucket(hid);
236         }
237         --hid->count;
238         return;
239     }
240     assert(0);
241 }
242 
243 /**
244  *  hash_get_bucket - returns the head item of the bucket
245  *  in the hash table 'hid'. Otherwise, returns NULL on error.
246  */
247 hash_link *
hash_get_bucket(hash_table * hid,unsigned int bucket)248 hash_get_bucket(hash_table * hid, unsigned int bucket)
249 {
250     if (bucket >= hid->size)
251         return NULL;
252     return (hid->buckets[bucket]);
253 }
254 
255 void
hashFreeItems(hash_table * hid,HASHFREE * free_func)256 hashFreeItems(hash_table * hid, HASHFREE * free_func)
257 {
258     hash_link *l;
259     int i = 0;
260     hash_link **list = (hash_link **)xcalloc(hid->count, sizeof(hash_link *));
261     hash_first(hid);
262     while ((l = hash_next(hid)) && i < hid->count) {
263         *(list + i) = l;
264         ++i;
265     }
266     for (int j = 0; j < i; ++j)
267         free_func(*(list + j));
268     xfree(list);
269 }
270 
271 void
hashFreeMemory(hash_table * hid)272 hashFreeMemory(hash_table * hid)
273 {
274     if (hid == NULL)
275         return;
276     if (hid->buckets)
277         xfree(hid->buckets);
278     xfree(hid);
279 }
280 
281 static int hash_primes[] = {
282     103,
283     229,
284     467,
285     977,
286     1979,
287     4019,
288     6037,
289     7951,
290     12149,
291     16231,
292     33493,
293     65357
294 };
295 
296 int
hashPrime(int n)297 hashPrime(int n)
298 {
299     int I = sizeof(hash_primes) / sizeof(int);
300     int best_prime = hash_primes[0];
301     double min = fabs(log((double) n) - log((double) hash_primes[0]));
302     double d;
303     for (int i = 0; i < I; ++i) {
304         d = fabs(log((double) n) - log((double) hash_primes[i]));
305         if (d > min)
306             continue;
307         min = d;
308         best_prime = hash_primes[i];
309     }
310     return best_prime;
311 }
312 
313 /**
314  * return the key of a hash_link as a const string
315  */
316 const char *
hashKeyStr(const hash_link * hl)317 hashKeyStr(const hash_link * hl)
318 {
319     return (const char *) hl->key;
320 }
321 
322 #if USE_HASH_DRIVER
323 /**
324  *  hash-driver - Run with a big file as stdin to insert each line into the
325  *  hash table, then prints the whole hash table, then deletes a random item,
326  *  and prints the table again...
327  */
328 int
main(void)329 main(void)
330 {
331     hash_table *hid;
332     LOCAL_ARRAY(char, buf, BUFSIZ);
333     LOCAL_ARRAY(char, todelete, BUFSIZ);
334     hash_link *walker = NULL;
335 
336     todelete[0] = '\0';
337     printf("init\n");
338 
339     printf("creating hash table\n");
340     if ((hid = hash_create((HASHCMP *) strcmp, 229, hash4)) < 0) {
341         printf("hash_create error.\n");
342         exit(1);
343     }
344     printf("done creating hash table: %d\n", hid);
345 
346     std::mt19937 mt;
347     xuniform_int_distribution<> dist(0,16);
348 
349     while (fgets(buf, BUFSIZ, stdin)) {
350         buf[strlen(buf) - 1] = '\0';
351         printf("Inserting '%s' for item %p to hash table: %d\n",
352                buf, buf, hid);
353         hash_insert(hid, xstrdup(buf), (void *) 0x12345678);
354         if (dist(mt) == 0)
355             strcpy(todelete, buf);
356     }
357 
358     printf("walking hash table...\n");
359     for (int i = 0, walker = hash_first(hid); walker; walker = hash_next(hid)) {
360         printf("item %5d: key: '%s' item: %p\n", i++, walker->key,
361                walker->item);
362     }
363     printf("done walking hash table...\n");
364 
365     if (todelete[0]) {
366         printf("deleting %s from %d\n", todelete, hid);
367         if (hash_delete(hid, todelete))
368             printf("hash_delete error\n");
369     }
370     printf("walking hash table...\n");
371     for (int i = 0, walker = hash_first(hid); walker; walker = hash_next(hid)) {
372         printf("item %5d: key: '%s' item: %p\n", i++, walker->key,
373                walker->item);
374     }
375     printf("done walking hash table...\n");
376 
377     printf("driver finished.\n");
378     exit(0);
379 }
380 #endif
381 
382