1*e4b17023SJohn Marino /* Hash tables.
2*e4b17023SJohn Marino Copyright (C) 2000, 2001, 2003, 2004, 2008, 2009
3*e4b17023SJohn Marino Free Software Foundation, Inc.
4*e4b17023SJohn Marino
5*e4b17023SJohn Marino This program is free software; you can redistribute it and/or modify it
6*e4b17023SJohn Marino under the terms of the GNU General Public License as published by the
7*e4b17023SJohn Marino Free Software Foundation; either version 3, or (at your option) any
8*e4b17023SJohn Marino later version.
9*e4b17023SJohn Marino
10*e4b17023SJohn Marino This program is distributed in the hope that it will be useful,
11*e4b17023SJohn Marino but WITHOUT ANY WARRANTY; without even the implied warranty of
12*e4b17023SJohn Marino MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13*e4b17023SJohn Marino GNU General Public License for more details.
14*e4b17023SJohn Marino
15*e4b17023SJohn Marino You should have received a copy of the GNU General Public License
16*e4b17023SJohn Marino along with this program; see the file COPYING3. If not see
17*e4b17023SJohn Marino <http://www.gnu.org/licenses/>.
18*e4b17023SJohn Marino
19*e4b17023SJohn Marino In other words, you are welcome to use, share and improve this program.
20*e4b17023SJohn Marino You are forbidden to forbid anyone else to use, share and improve
21*e4b17023SJohn Marino what you give them. Help stamp out software-hoarding! */
22*e4b17023SJohn Marino
23*e4b17023SJohn Marino #include "config.h"
24*e4b17023SJohn Marino #include "system.h"
25*e4b17023SJohn Marino #include "symtab.h"
26*e4b17023SJohn Marino
27*e4b17023SJohn Marino /* The code below is a specialization of Vladimir Makarov's expandable
28*e4b17023SJohn Marino hash tables (see libiberty/hashtab.c). The abstraction penalty was
29*e4b17023SJohn Marino too high to continue using the generic form. This code knows
30*e4b17023SJohn Marino intrinsically how to calculate a hash value, and how to compare an
31*e4b17023SJohn Marino existing entry with a potential new one. */
32*e4b17023SJohn Marino
33*e4b17023SJohn Marino static unsigned int calc_hash (const unsigned char *, size_t);
34*e4b17023SJohn Marino static void ht_expand (hash_table *);
35*e4b17023SJohn Marino static double approx_sqrt (double);
36*e4b17023SJohn Marino
37*e4b17023SJohn Marino /* A deleted entry. */
38*e4b17023SJohn Marino #define DELETED ((hashnode) -1)
39*e4b17023SJohn Marino
40*e4b17023SJohn Marino /* Calculate the hash of the string STR of length LEN. */
41*e4b17023SJohn Marino
42*e4b17023SJohn Marino static unsigned int
calc_hash(const unsigned char * str,size_t len)43*e4b17023SJohn Marino calc_hash (const unsigned char *str, size_t len)
44*e4b17023SJohn Marino {
45*e4b17023SJohn Marino size_t n = len;
46*e4b17023SJohn Marino unsigned int r = 0;
47*e4b17023SJohn Marino
48*e4b17023SJohn Marino while (n--)
49*e4b17023SJohn Marino r = HT_HASHSTEP (r, *str++);
50*e4b17023SJohn Marino
51*e4b17023SJohn Marino return HT_HASHFINISH (r, len);
52*e4b17023SJohn Marino }
53*e4b17023SJohn Marino
54*e4b17023SJohn Marino /* Initialize an identifier hashtable. */
55*e4b17023SJohn Marino
56*e4b17023SJohn Marino hash_table *
ht_create(unsigned int order)57*e4b17023SJohn Marino ht_create (unsigned int order)
58*e4b17023SJohn Marino {
59*e4b17023SJohn Marino unsigned int nslots = 1 << order;
60*e4b17023SJohn Marino hash_table *table;
61*e4b17023SJohn Marino
62*e4b17023SJohn Marino table = XCNEW (hash_table);
63*e4b17023SJohn Marino
64*e4b17023SJohn Marino /* Strings need no alignment. */
65*e4b17023SJohn Marino _obstack_begin (&table->stack, 0, 0,
66*e4b17023SJohn Marino (void *(*) (long)) xmalloc,
67*e4b17023SJohn Marino (void (*) (void *)) free);
68*e4b17023SJohn Marino
69*e4b17023SJohn Marino obstack_alignment_mask (&table->stack) = 0;
70*e4b17023SJohn Marino
71*e4b17023SJohn Marino table->entries = XCNEWVEC (hashnode, nslots);
72*e4b17023SJohn Marino table->entries_owned = true;
73*e4b17023SJohn Marino table->nslots = nslots;
74*e4b17023SJohn Marino return table;
75*e4b17023SJohn Marino }
76*e4b17023SJohn Marino
77*e4b17023SJohn Marino /* Frees all memory associated with a hash table. */
78*e4b17023SJohn Marino
79*e4b17023SJohn Marino void
ht_destroy(hash_table * table)80*e4b17023SJohn Marino ht_destroy (hash_table *table)
81*e4b17023SJohn Marino {
82*e4b17023SJohn Marino obstack_free (&table->stack, NULL);
83*e4b17023SJohn Marino if (table->entries_owned)
84*e4b17023SJohn Marino free (table->entries);
85*e4b17023SJohn Marino free (table);
86*e4b17023SJohn Marino }
87*e4b17023SJohn Marino
88*e4b17023SJohn Marino /* Returns the hash entry for the a STR of length LEN. If that string
89*e4b17023SJohn Marino already exists in the table, returns the existing entry. If the
90*e4b17023SJohn Marino identifier hasn't been seen before, and INSERT is CPP_NO_INSERT,
91*e4b17023SJohn Marino returns NULL. Otherwise insert and returns a new entry. A new
92*e4b17023SJohn Marino string is allocated. */
93*e4b17023SJohn Marino hashnode
ht_lookup(hash_table * table,const unsigned char * str,size_t len,enum ht_lookup_option insert)94*e4b17023SJohn Marino ht_lookup (hash_table *table, const unsigned char *str, size_t len,
95*e4b17023SJohn Marino enum ht_lookup_option insert)
96*e4b17023SJohn Marino {
97*e4b17023SJohn Marino return ht_lookup_with_hash (table, str, len, calc_hash (str, len),
98*e4b17023SJohn Marino insert);
99*e4b17023SJohn Marino }
100*e4b17023SJohn Marino
101*e4b17023SJohn Marino hashnode
ht_lookup_with_hash(hash_table * table,const unsigned char * str,size_t len,unsigned int hash,enum ht_lookup_option insert)102*e4b17023SJohn Marino ht_lookup_with_hash (hash_table *table, const unsigned char *str,
103*e4b17023SJohn Marino size_t len, unsigned int hash,
104*e4b17023SJohn Marino enum ht_lookup_option insert)
105*e4b17023SJohn Marino {
106*e4b17023SJohn Marino unsigned int hash2;
107*e4b17023SJohn Marino unsigned int index;
108*e4b17023SJohn Marino unsigned int deleted_index = table->nslots;
109*e4b17023SJohn Marino size_t sizemask;
110*e4b17023SJohn Marino hashnode node;
111*e4b17023SJohn Marino
112*e4b17023SJohn Marino sizemask = table->nslots - 1;
113*e4b17023SJohn Marino index = hash & sizemask;
114*e4b17023SJohn Marino table->searches++;
115*e4b17023SJohn Marino
116*e4b17023SJohn Marino node = table->entries[index];
117*e4b17023SJohn Marino
118*e4b17023SJohn Marino if (node != NULL)
119*e4b17023SJohn Marino {
120*e4b17023SJohn Marino if (node == DELETED)
121*e4b17023SJohn Marino deleted_index = index;
122*e4b17023SJohn Marino else if (node->hash_value == hash
123*e4b17023SJohn Marino && HT_LEN (node) == (unsigned int) len
124*e4b17023SJohn Marino && !memcmp (HT_STR (node), str, len))
125*e4b17023SJohn Marino return node;
126*e4b17023SJohn Marino
127*e4b17023SJohn Marino /* hash2 must be odd, so we're guaranteed to visit every possible
128*e4b17023SJohn Marino location in the table during rehashing. */
129*e4b17023SJohn Marino hash2 = ((hash * 17) & sizemask) | 1;
130*e4b17023SJohn Marino
131*e4b17023SJohn Marino for (;;)
132*e4b17023SJohn Marino {
133*e4b17023SJohn Marino table->collisions++;
134*e4b17023SJohn Marino index = (index + hash2) & sizemask;
135*e4b17023SJohn Marino node = table->entries[index];
136*e4b17023SJohn Marino if (node == NULL)
137*e4b17023SJohn Marino break;
138*e4b17023SJohn Marino
139*e4b17023SJohn Marino if (node == DELETED)
140*e4b17023SJohn Marino {
141*e4b17023SJohn Marino if (deleted_index != table->nslots)
142*e4b17023SJohn Marino deleted_index = index;
143*e4b17023SJohn Marino }
144*e4b17023SJohn Marino else if (node->hash_value == hash
145*e4b17023SJohn Marino && HT_LEN (node) == (unsigned int) len
146*e4b17023SJohn Marino && !memcmp (HT_STR (node), str, len))
147*e4b17023SJohn Marino return node;
148*e4b17023SJohn Marino }
149*e4b17023SJohn Marino }
150*e4b17023SJohn Marino
151*e4b17023SJohn Marino if (insert == HT_NO_INSERT)
152*e4b17023SJohn Marino return NULL;
153*e4b17023SJohn Marino
154*e4b17023SJohn Marino /* We prefer to overwrite the first deleted slot we saw. */
155*e4b17023SJohn Marino if (deleted_index != table->nslots)
156*e4b17023SJohn Marino index = deleted_index;
157*e4b17023SJohn Marino
158*e4b17023SJohn Marino node = (*table->alloc_node) (table);
159*e4b17023SJohn Marino table->entries[index] = node;
160*e4b17023SJohn Marino
161*e4b17023SJohn Marino HT_LEN (node) = (unsigned int) len;
162*e4b17023SJohn Marino node->hash_value = hash;
163*e4b17023SJohn Marino
164*e4b17023SJohn Marino if (table->alloc_subobject)
165*e4b17023SJohn Marino {
166*e4b17023SJohn Marino char *chars = (char *) table->alloc_subobject (len + 1);
167*e4b17023SJohn Marino memcpy (chars, str, len);
168*e4b17023SJohn Marino chars[len] = '\0';
169*e4b17023SJohn Marino HT_STR (node) = (const unsigned char *) chars;
170*e4b17023SJohn Marino }
171*e4b17023SJohn Marino else
172*e4b17023SJohn Marino HT_STR (node) = (const unsigned char *) obstack_copy0 (&table->stack,
173*e4b17023SJohn Marino str, len);
174*e4b17023SJohn Marino
175*e4b17023SJohn Marino if (++table->nelements * 4 >= table->nslots * 3)
176*e4b17023SJohn Marino /* Must expand the string table. */
177*e4b17023SJohn Marino ht_expand (table);
178*e4b17023SJohn Marino
179*e4b17023SJohn Marino return node;
180*e4b17023SJohn Marino }
181*e4b17023SJohn Marino
182*e4b17023SJohn Marino /* Double the size of a hash table, re-hashing existing entries. */
183*e4b17023SJohn Marino
184*e4b17023SJohn Marino static void
ht_expand(hash_table * table)185*e4b17023SJohn Marino ht_expand (hash_table *table)
186*e4b17023SJohn Marino {
187*e4b17023SJohn Marino hashnode *nentries, *p, *limit;
188*e4b17023SJohn Marino unsigned int size, sizemask;
189*e4b17023SJohn Marino
190*e4b17023SJohn Marino size = table->nslots * 2;
191*e4b17023SJohn Marino nentries = XCNEWVEC (hashnode, size);
192*e4b17023SJohn Marino sizemask = size - 1;
193*e4b17023SJohn Marino
194*e4b17023SJohn Marino p = table->entries;
195*e4b17023SJohn Marino limit = p + table->nslots;
196*e4b17023SJohn Marino do
197*e4b17023SJohn Marino if (*p && *p != DELETED)
198*e4b17023SJohn Marino {
199*e4b17023SJohn Marino unsigned int index, hash, hash2;
200*e4b17023SJohn Marino
201*e4b17023SJohn Marino hash = (*p)->hash_value;
202*e4b17023SJohn Marino index = hash & sizemask;
203*e4b17023SJohn Marino
204*e4b17023SJohn Marino if (nentries[index])
205*e4b17023SJohn Marino {
206*e4b17023SJohn Marino hash2 = ((hash * 17) & sizemask) | 1;
207*e4b17023SJohn Marino do
208*e4b17023SJohn Marino {
209*e4b17023SJohn Marino index = (index + hash2) & sizemask;
210*e4b17023SJohn Marino }
211*e4b17023SJohn Marino while (nentries[index]);
212*e4b17023SJohn Marino }
213*e4b17023SJohn Marino nentries[index] = *p;
214*e4b17023SJohn Marino }
215*e4b17023SJohn Marino while (++p < limit);
216*e4b17023SJohn Marino
217*e4b17023SJohn Marino if (table->entries_owned)
218*e4b17023SJohn Marino free (table->entries);
219*e4b17023SJohn Marino table->entries_owned = true;
220*e4b17023SJohn Marino table->entries = nentries;
221*e4b17023SJohn Marino table->nslots = size;
222*e4b17023SJohn Marino }
223*e4b17023SJohn Marino
224*e4b17023SJohn Marino /* For all nodes in TABLE, callback CB with parameters TABLE->PFILE,
225*e4b17023SJohn Marino the node, and V. */
226*e4b17023SJohn Marino void
ht_forall(hash_table * table,ht_cb cb,const void * v)227*e4b17023SJohn Marino ht_forall (hash_table *table, ht_cb cb, const void *v)
228*e4b17023SJohn Marino {
229*e4b17023SJohn Marino hashnode *p, *limit;
230*e4b17023SJohn Marino
231*e4b17023SJohn Marino p = table->entries;
232*e4b17023SJohn Marino limit = p + table->nslots;
233*e4b17023SJohn Marino do
234*e4b17023SJohn Marino if (*p && *p != DELETED)
235*e4b17023SJohn Marino {
236*e4b17023SJohn Marino if ((*cb) (table->pfile, *p, v) == 0)
237*e4b17023SJohn Marino break;
238*e4b17023SJohn Marino }
239*e4b17023SJohn Marino while (++p < limit);
240*e4b17023SJohn Marino }
241*e4b17023SJohn Marino
242*e4b17023SJohn Marino /* Like ht_forall, but a nonzero return from the callback means that
243*e4b17023SJohn Marino the entry should be removed from the table. */
244*e4b17023SJohn Marino void
ht_purge(hash_table * table,ht_cb cb,const void * v)245*e4b17023SJohn Marino ht_purge (hash_table *table, ht_cb cb, const void *v)
246*e4b17023SJohn Marino {
247*e4b17023SJohn Marino hashnode *p, *limit;
248*e4b17023SJohn Marino
249*e4b17023SJohn Marino p = table->entries;
250*e4b17023SJohn Marino limit = p + table->nslots;
251*e4b17023SJohn Marino do
252*e4b17023SJohn Marino if (*p && *p != DELETED)
253*e4b17023SJohn Marino {
254*e4b17023SJohn Marino if ((*cb) (table->pfile, *p, v))
255*e4b17023SJohn Marino *p = DELETED;
256*e4b17023SJohn Marino }
257*e4b17023SJohn Marino while (++p < limit);
258*e4b17023SJohn Marino }
259*e4b17023SJohn Marino
260*e4b17023SJohn Marino /* Restore the hash table. */
261*e4b17023SJohn Marino void
ht_load(hash_table * ht,hashnode * entries,unsigned int nslots,unsigned int nelements,bool own)262*e4b17023SJohn Marino ht_load (hash_table *ht, hashnode *entries,
263*e4b17023SJohn Marino unsigned int nslots, unsigned int nelements,
264*e4b17023SJohn Marino bool own)
265*e4b17023SJohn Marino {
266*e4b17023SJohn Marino if (ht->entries_owned)
267*e4b17023SJohn Marino free (ht->entries);
268*e4b17023SJohn Marino ht->entries = entries;
269*e4b17023SJohn Marino ht->nslots = nslots;
270*e4b17023SJohn Marino ht->nelements = nelements;
271*e4b17023SJohn Marino ht->entries_owned = own;
272*e4b17023SJohn Marino }
273*e4b17023SJohn Marino
274*e4b17023SJohn Marino /* Dump allocation statistics to stderr. */
275*e4b17023SJohn Marino
276*e4b17023SJohn Marino void
ht_dump_statistics(hash_table * table)277*e4b17023SJohn Marino ht_dump_statistics (hash_table *table)
278*e4b17023SJohn Marino {
279*e4b17023SJohn Marino size_t nelts, nids, overhead, headers;
280*e4b17023SJohn Marino size_t total_bytes, longest, deleted = 0;
281*e4b17023SJohn Marino double sum_of_squares, exp_len, exp_len2, exp2_len;
282*e4b17023SJohn Marino hashnode *p, *limit;
283*e4b17023SJohn Marino
284*e4b17023SJohn Marino #define SCALE(x) ((unsigned long) ((x) < 1024*10 \
285*e4b17023SJohn Marino ? (x) \
286*e4b17023SJohn Marino : ((x) < 1024*1024*10 \
287*e4b17023SJohn Marino ? (x) / 1024 \
288*e4b17023SJohn Marino : (x) / (1024*1024))))
289*e4b17023SJohn Marino #define LABEL(x) ((x) < 1024*10 ? ' ' : ((x) < 1024*1024*10 ? 'k' : 'M'))
290*e4b17023SJohn Marino
291*e4b17023SJohn Marino total_bytes = longest = sum_of_squares = nids = 0;
292*e4b17023SJohn Marino p = table->entries;
293*e4b17023SJohn Marino limit = p + table->nslots;
294*e4b17023SJohn Marino do
295*e4b17023SJohn Marino if (*p == DELETED)
296*e4b17023SJohn Marino ++deleted;
297*e4b17023SJohn Marino else if (*p)
298*e4b17023SJohn Marino {
299*e4b17023SJohn Marino size_t n = HT_LEN (*p);
300*e4b17023SJohn Marino
301*e4b17023SJohn Marino total_bytes += n;
302*e4b17023SJohn Marino sum_of_squares += (double) n * n;
303*e4b17023SJohn Marino if (n > longest)
304*e4b17023SJohn Marino longest = n;
305*e4b17023SJohn Marino nids++;
306*e4b17023SJohn Marino }
307*e4b17023SJohn Marino while (++p < limit);
308*e4b17023SJohn Marino
309*e4b17023SJohn Marino nelts = table->nelements;
310*e4b17023SJohn Marino overhead = obstack_memory_used (&table->stack) - total_bytes;
311*e4b17023SJohn Marino headers = table->nslots * sizeof (hashnode);
312*e4b17023SJohn Marino
313*e4b17023SJohn Marino fprintf (stderr, "\nString pool\nentries\t\t%lu\n",
314*e4b17023SJohn Marino (unsigned long) nelts);
315*e4b17023SJohn Marino fprintf (stderr, "identifiers\t%lu (%.2f%%)\n",
316*e4b17023SJohn Marino (unsigned long) nids, nids * 100.0 / nelts);
317*e4b17023SJohn Marino fprintf (stderr, "slots\t\t%lu\n",
318*e4b17023SJohn Marino (unsigned long) table->nslots);
319*e4b17023SJohn Marino fprintf (stderr, "deleted\t\t%lu\n",
320*e4b17023SJohn Marino (unsigned long) deleted);
321*e4b17023SJohn Marino fprintf (stderr, "bytes\t\t%lu%c (%lu%c overhead)\n",
322*e4b17023SJohn Marino SCALE (total_bytes), LABEL (total_bytes),
323*e4b17023SJohn Marino SCALE (overhead), LABEL (overhead));
324*e4b17023SJohn Marino fprintf (stderr, "table size\t%lu%c\n",
325*e4b17023SJohn Marino SCALE (headers), LABEL (headers));
326*e4b17023SJohn Marino
327*e4b17023SJohn Marino exp_len = (double)total_bytes / (double)nelts;
328*e4b17023SJohn Marino exp2_len = exp_len * exp_len;
329*e4b17023SJohn Marino exp_len2 = (double) sum_of_squares / (double) nelts;
330*e4b17023SJohn Marino
331*e4b17023SJohn Marino fprintf (stderr, "coll/search\t%.4f\n",
332*e4b17023SJohn Marino (double) table->collisions / (double) table->searches);
333*e4b17023SJohn Marino fprintf (stderr, "ins/search\t%.4f\n",
334*e4b17023SJohn Marino (double) nelts / (double) table->searches);
335*e4b17023SJohn Marino fprintf (stderr, "avg. entry\t%.2f bytes (+/- %.2f)\n",
336*e4b17023SJohn Marino exp_len, approx_sqrt (exp_len2 - exp2_len));
337*e4b17023SJohn Marino fprintf (stderr, "longest entry\t%lu\n",
338*e4b17023SJohn Marino (unsigned long) longest);
339*e4b17023SJohn Marino #undef SCALE
340*e4b17023SJohn Marino #undef LABEL
341*e4b17023SJohn Marino }
342*e4b17023SJohn Marino
343*e4b17023SJohn Marino /* Return the approximate positive square root of a number N. This is for
344*e4b17023SJohn Marino statistical reports, not code generation. */
345*e4b17023SJohn Marino static double
approx_sqrt(double x)346*e4b17023SJohn Marino approx_sqrt (double x)
347*e4b17023SJohn Marino {
348*e4b17023SJohn Marino double s, d;
349*e4b17023SJohn Marino
350*e4b17023SJohn Marino if (x < 0)
351*e4b17023SJohn Marino abort ();
352*e4b17023SJohn Marino if (x == 0)
353*e4b17023SJohn Marino return 0;
354*e4b17023SJohn Marino
355*e4b17023SJohn Marino s = x;
356*e4b17023SJohn Marino do
357*e4b17023SJohn Marino {
358*e4b17023SJohn Marino d = (s * s - x) / (2 * s);
359*e4b17023SJohn Marino s -= d;
360*e4b17023SJohn Marino }
361*e4b17023SJohn Marino while (d > .0001);
362*e4b17023SJohn Marino return s;
363*e4b17023SJohn Marino }
364