1 /* Provides high-level routines to manipulate the keywork list
2 structures the code generation output.
3 Copyright (C) 1989 Free Software Foundation, Inc.
4 written by Douglas C. Schmidt (schmidt@ics.uci.edu)
5
6 This file is part of GNU GPERF.
7
8 GNU GPERF is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 1, or (at your option)
11 any later version.
12
13 GNU GPERF is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GNU GPERF; see the file COPYING. If not, write to
20 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
21
22 #include <stdio.h>
23 #include <assert.h>
24 #include <ctype.h>
25 #include "options.h"
26 #include "perfect.h"
27 #include "stderr.h"
28
29 /* Current release version. */
30 extern char *version_string;
31
32 /* Counts occurrences of each key set character. */
33 int occurrences[ALPHABET_SIZE];
34
35 /* Value associated with each character. */
36 int asso_values[ALPHABET_SIZE];
37
38 /* Locally visible PERFECT object. */
39 PERFECT perfect;
40
41 /* Efficiently returns the least power of two greater than or equal to X! */
42 #define POW(X) ((!X)?1:(X-=1,X|=X>>1,X|=X>>2,X|=X>>4,X|=X>>8,X|=X>>16,(++X)))
43
44 /* Reads input keys, possibly applies the reordering heuristic, sets the
45 maximum associated value size (rounded up to the nearest power of 2),
46 may initialize the associated values array, and determines the maximum
47 hash table size. Note: using the random numbers is often helpful,
48 though not as deterministic, of course! */
49
50 void
perfect_init()51 perfect_init ()
52 {
53 int asso_value_max;
54 int len;
55
56 perfect.num_done = 1;
57 perfect.fewest_collisions = 0;
58 read_keys ();
59 if (OPTION_ENABLED (option, ORDER))
60 reorder ();
61 asso_value_max = GET_ASSO_MAX (option);
62 len = keyword_list_length ();
63 asso_value_max = (asso_value_max ? asso_value_max * len : len);
64 SET_ASSO_MAX (option, POW (asso_value_max));
65
66 if (OPTION_ENABLED (option, RANDOM))
67 {
68 int i;
69
70 srandom (time (0));
71
72 for (i = 0; i < ALPHABET_SIZE; i++)
73 asso_values[i] = (random () & asso_value_max - 1);
74 }
75 else
76 {
77 int asso_value = INITIAL_VALUE (option);
78 if (asso_value) /* Initialize array if user requests non-zero default. */
79 {
80 int i;
81
82 for (i = ALPHABET_SIZE - 1; i >= 0; i--)
83 asso_values[i] = asso_value & GET_ASSO_MAX (option) - 1;
84 }
85 }
86 perfect.max_hash_value = max_key_length () + GET_ASSO_MAX (option) *
87 GET_CHARSET_SIZE (option);
88
89 printf ("/* C code produced by gperf version %s */\n", version_string);
90 print_options ();
91
92 if (OPTION_ENABLED (option, NOCASE))
93 printf ("#include <ctype.h>\n\n");
94
95 if (OPTION_ENABLED (option, DEBUG))
96 {
97 int i;
98 fprintf (stderr, "\nnumber of keys = %d\nmaximum associated value is %d\
99 \nmaximum possible size of generated hash table is %d\n",
100 len, asso_value_max, perfect.max_hash_value);
101 }
102 }
103
104 /* Merge two hash key multisets to form the ordered disjoint union of the sets.
105 (In a multiset, an element can occur multiple times). Precondition: both
106 set_1 and set_2 must be ordered. Returns the length of the combined set. */
107
108 static int
compute_disjoint_union(set_1,set_2,set_3)109 compute_disjoint_union (set_1, set_2, set_3)
110 char *set_1;
111 char *set_2;
112 char *set_3;
113 {
114 char *base = set_3;
115
116 while (*set_1 && *set_2)
117 if (*set_1 == *set_2)
118 set_1++, set_2++;
119 else
120 {
121 *set_3 = *set_1 < *set_2 ? *set_1++ : *set_2++;
122 if (set_3 == base || *set_3 != *(set_3-1)) set_3++;
123 }
124
125 while (*set_1)
126 {
127 *set_3 = *set_1++;
128 if (set_3 == base || *set_3 != *(set_3-1)) set_3++;
129 }
130
131 while (*set_2)
132 {
133 *set_3 = *set_2++;
134 if (set_3 == base || *set_3 != *(set_3-1)) set_3++;
135 }
136 *set_3 = '\0';
137 return set_3 - base;
138 }
139
140 /* Sort the UNION_SET in increasing frequency of occurrence.
141 This speeds up later processing since we may assume the resulting
142 set (Set_3, in this case), is ordered. Uses insertion sort, since
143 the UNION_SET is typically short. */
144
145 static void
sort_set(union_set,len)146 sort_set (union_set, len)
147 char *union_set;
148 int len;
149 {
150 int i, j;
151
152 for (i = 0, j = len - 1; i < j; i++)
153 {
154 char curr, tmp;
155
156 for (curr = i+1, tmp = union_set[curr];
157 curr > 0 && occurrences[tmp] < occurrences[union_set[curr-1]];
158 curr--)
159 union_set[curr] = union_set[curr - 1];
160
161 union_set[curr] = tmp;
162 }
163 }
164
165 /* Generate a key set's hash value. */
166
167 static int
hash(key_node)168 hash (key_node)
169 LIST_NODE *key_node;
170 {
171 int sum = OPTION_ENABLED (option, NOLENGTH) ? 0 : key_node->length;
172 char *ptr;
173
174 for (ptr = key_node->char_set; *ptr; ptr++)
175 sum += asso_values[*ptr];
176
177 return key_node->hash_value = sum;
178 }
179
180 /* Find out how associated value changes affect successfully hashed items.
181 Returns FALSE if no other hash values are affected, else returns TRUE.
182 Note that because GET_ASSO_MAX (option) is a power of two we can guarantee
183 that all legal ASSO_VALUES are visited without repetition since
184 GET_JUMP (option) was forced to be an odd value! */
185
186 static bool
affects_prev(c,curr)187 affects_prev (c, curr)
188 char c;
189 LIST_NODE *curr;
190 {
191 int original_char = asso_values[c];
192 int i = !OPTION_ENABLED (option, FAST) ? GET_ASSO_MAX (option) :
193 GET_ITERATIONS (option) == 0 ? key_list.list_len : GET_ITERATIONS (option);
194
195 /* Try all asso_values. */
196
197 while (--i >= 0)
198 {
199 int collisions = 0;
200 LIST_NODE *ptr;
201
202 asso_values[c] = asso_values[c] + (GET_JUMP (option) ? GET_JUMP (option) : random ())
203 & GET_ASSO_MAX (option) - 1;
204 bool_array_reset ();
205
206 /* See how this asso_value change affects previous keywords. If
207 it does better than before we'll take it! */
208
209 for (ptr = key_list.head;
210 !lookup (hash (ptr)) || ++collisions < perfect.fewest_collisions;
211 ptr = ptr->next)
212 if (ptr == curr)
213 {
214 perfect.fewest_collisions = collisions;
215 return FALSE;
216 }
217 }
218
219 asso_values[c] = original_char; /* Restore original values, no more tries. */
220 return TRUE; /* If we're this far it's time to try the next character.... */
221 }
222
223 /* Change a character value, try least-used characters first. */
224
225 static void
change(prior,curr)226 change (prior, curr)
227 LIST_NODE *prior;
228 LIST_NODE *curr;
229 {
230 char *xmalloc ();
231 static char *union_set = 0;
232 char *temp;
233 LIST_NODE *ptr;
234
235 if (!union_set)
236 union_set = xmalloc (2 * GET_CHARSET_SIZE (option) + 1);
237
238 if (OPTION_ENABLED (option, DEBUG)) /* Very useful for debugging. */
239 {
240 fprintf (stderr, "collision on keyword #%d, prior=\"%s\", curr=\"%s\", hash=%d\n",
241 perfect.num_done, prior->key, curr->key, curr->hash_value);
242 fflush (stderr);
243 }
244 sort_set (union_set, compute_disjoint_union (prior->char_set, curr->char_set, union_set));
245
246 /* Try changing some values, if change doesn't alter other values continue normal action. */
247
248 perfect.fewest_collisions++;
249
250 for (temp = union_set; *temp; temp++)
251 if (!affects_prev (*temp, curr))
252 {
253 if (OPTION_ENABLED (option, DEBUG))
254 {
255 fprintf (stderr, "- resolved by changing asso_value['%c'] (char #%d) to %d\n",
256 *temp, temp - union_set + 1, asso_values[*temp]);
257 fflush (stderr);
258 }
259 return; /* Good, doesn't affect previous hash values, we'll take it. */
260 }
261
262 for (ptr = key_list.head; ptr != curr; ptr = ptr->next)
263 hash (ptr);
264
265 hash (curr);
266
267 if (OPTION_ENABLED (option, DEBUG))
268 {
269 fprintf (stderr, "** collision not resolved, %d duplicates remain, continuing...\n",
270 perfect.fewest_collisions);
271 fflush (stderr);
272 }
273 }
274
275 /* Does the hard stuff....
276 Initializes the Iteration Number boolean array, and then trys to find a
277 perfect function that will hash all the key words without getting any
278 duplications. This is made much easier since we aren't attempting
279 to generate *minimum* functions, only perfect ones.
280 If we can't generate a perfect function in one pass *and* the user
281 hasn't enabled the DUP option, we'll inform the user to try the
282 randomization option, use -D, or choose alternative key positions.
283 The alternatives (e.g., back-tracking) are too time-consuming, i.e,
284 exponential in the number of keys. */
285
286 int
perfect_generate()287 perfect_generate ()
288 {
289 LIST_NODE *curr;
290 bool_array_init (perfect.max_hash_value);
291
292 for (curr = key_list.head; curr; curr = curr->next)
293 {
294 LIST_NODE *ptr;
295 hash (curr);
296
297 for (ptr = key_list.head; ptr != curr; ptr = ptr->next)
298 if (ptr->hash_value == curr->hash_value)
299 {
300 change (ptr, curr);
301 break;
302 }
303 perfect.num_done++;
304 }
305
306
307 /* Make one final check, just to make sure nothing weird happened.... */
308 bool_array_reset ();
309
310 for (curr = key_list.head; curr; curr = curr->next)
311 if (lookup (hash (curr)))
312 if (OPTION_ENABLED (option, DUP)) /* We'll try to deal with this later..... */
313 break;
314 else /* Yow, big problems. we're outta here! */
315 {
316 report_error ("\nInternal error, duplicate value %d:\n\
317 try options -D or -r, or use new key positions.\n\n",
318 hash (curr));
319 return 1;
320 }
321
322 bool_array_destroy ();
323
324 /* First sorts the key word list by hash value, and the outputs the
325 list to the proper ostream. The generated hash table code is only
326 output if the early stage of processing turned out O.K. */
327
328 sort ();
329 print_output ();
330 return 0;
331 }
332
333 /* Prints out some diagnostics upon completion. */
334
335 void
perfect_destroy()336 perfect_destroy ()
337 {
338 if (OPTION_ENABLED (option, DEBUG))
339 {
340 int i;
341
342 fprintf (stderr, "\ndumping occurrence and associated values tables\n");
343
344 for (i = 0; i < ALPHABET_SIZE; i++)
345 if (occurrences[i])
346 fprintf (stderr, "asso_values[%c] = %3d, occurrences[%c] = %3d\n",
347 i, asso_values[i], i, occurrences[i]);
348
349 fprintf (stderr, "end table dumping\n");
350
351 }
352 }
353
354