1 /* Provides high-level routines to manipulate the keywork list
2    structures the code generation output.
3    Copyright (C) 1989 Free Software Foundation, Inc.
4    written by Douglas C. Schmidt (schmidt@ics.uci.edu)
5 
6 This file is part of GNU GPERF.
7 
8 GNU GPERF is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 1, or (at your option)
11 any later version.
12 
13 GNU GPERF is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 GNU General Public License for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GNU GPERF; see the file COPYING.  If not, write to
20 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
21 
22 #include <stdio.h>
23 #include <assert.h>
24 #include <ctype.h>
25 #include "options.h"
26 #include "perfect.h"
27 #include "stderr.h"
28 
29 /* Current release version. */
30 extern char *version_string;
31 
32 /* Counts occurrences of each key set character. */
33 int occurrences[ALPHABET_SIZE];
34 
35 /* Value associated with each character. */
36 int asso_values[ALPHABET_SIZE];
37 
38 /* Locally visible PERFECT object. */
39 PERFECT perfect;
40 
41 /* Efficiently returns the least power of two greater than or equal to X! */
42 #define POW(X) ((!X)?1:(X-=1,X|=X>>1,X|=X>>2,X|=X>>4,X|=X>>8,X|=X>>16,(++X)))
43 
44 /* Reads input keys, possibly applies the reordering heuristic, sets the
45    maximum associated value size (rounded up to the nearest power of 2),
46    may initialize the associated values array, and determines the maximum
47    hash table size.  Note: using the random numbers is often helpful,
48    though not as deterministic, of course! */
49 
50 void
perfect_init()51 perfect_init ()
52 {
53   int asso_value_max;
54   int len;
55 
56   perfect.num_done = 1;
57   perfect.fewest_collisions = 0;
58   read_keys ();
59   if (OPTION_ENABLED (option, ORDER))
60     reorder ();
61   asso_value_max = GET_ASSO_MAX (option);
62   len            = keyword_list_length ();
63   asso_value_max = (asso_value_max ? asso_value_max * len : len);
64   SET_ASSO_MAX (option, POW (asso_value_max));
65 
66   if (OPTION_ENABLED (option, RANDOM))
67     {
68       int i;
69 
70       srandom (time (0));
71 
72       for (i = 0; i < ALPHABET_SIZE; i++)
73         asso_values[i] = (random () & asso_value_max - 1);
74     }
75   else
76     {
77       int asso_value = INITIAL_VALUE (option);
78       if (asso_value)           /* Initialize array if user requests non-zero default. */
79         {
80           int i;
81 
82           for (i = ALPHABET_SIZE - 1; i >= 0; i--)
83             asso_values[i] = asso_value & GET_ASSO_MAX (option) - 1;
84         }
85     }
86   perfect.max_hash_value = max_key_length () + GET_ASSO_MAX (option) *
87     GET_CHARSET_SIZE (option);
88 
89   printf ("/* C code produced by gperf version %s */\n", version_string);
90   print_options ();
91 
92   if (OPTION_ENABLED (option, NOCASE))
93     printf ("#include <ctype.h>\n\n");
94 
95   if (OPTION_ENABLED (option, DEBUG))
96     {
97       int i;
98       fprintf (stderr, "\nnumber of keys = %d\nmaximum associated value is %d\
99 \nmaximum possible size of generated hash table is %d\n",
100                len, asso_value_max, perfect.max_hash_value);
101     }
102 }
103 
104 /* Merge two hash key multisets to form the ordered disjoint union of the sets.
105    (In a multiset, an element can occur multiple times). Precondition: both
106    set_1 and set_2 must be ordered. Returns the length of the combined set. */
107 
108 static int
compute_disjoint_union(set_1,set_2,set_3)109 compute_disjoint_union (set_1, set_2, set_3)
110      char *set_1;
111      char *set_2;
112      char *set_3;
113 {
114   char *base = set_3;
115 
116   while (*set_1 && *set_2)
117     if (*set_1 == *set_2)
118       set_1++, set_2++;
119     else
120       {
121         *set_3 = *set_1 < *set_2 ? *set_1++ : *set_2++;
122         if (set_3 == base || *set_3 != *(set_3-1)) set_3++;
123       }
124 
125   while (*set_1)
126     {
127       *set_3 = *set_1++;
128       if (set_3 == base || *set_3 != *(set_3-1)) set_3++;
129     }
130 
131   while (*set_2)
132     {
133       *set_3 = *set_2++;
134       if (set_3 == base || *set_3 != *(set_3-1)) set_3++;
135     }
136   *set_3 = '\0';
137   return set_3 - base;
138 }
139 
140 /* Sort the UNION_SET in increasing frequency of occurrence.
141    This speeds up later processing since we may assume the resulting
142    set (Set_3, in this case), is ordered. Uses insertion sort, since
143    the UNION_SET is typically short. */
144 
145 static void
sort_set(union_set,len)146 sort_set (union_set, len)
147      char *union_set;
148      int   len;
149 {
150   int i, j;
151 
152   for (i = 0, j = len - 1; i < j; i++)
153     {
154       char curr, tmp;
155 
156       for (curr = i+1, tmp = union_set[curr];
157            curr > 0 && occurrences[tmp] < occurrences[union_set[curr-1]];
158            curr--)
159         union_set[curr] = union_set[curr - 1];
160 
161       union_set[curr] = tmp;
162     }
163 }
164 
165 /* Generate a key set's hash value. */
166 
167 static int
hash(key_node)168 hash (key_node)
169      LIST_NODE *key_node;
170 {
171   int   sum = OPTION_ENABLED (option, NOLENGTH) ? 0 : key_node->length;
172   char *ptr;
173 
174   for (ptr = key_node->char_set; *ptr; ptr++)
175       sum += asso_values[*ptr];
176 
177   return key_node->hash_value = sum;
178 }
179 
180 /* Find out how associated value changes affect successfully hashed items.
181    Returns FALSE if no other hash values are affected, else returns TRUE.
182    Note that because GET_ASSO_MAX (option) is a power of two we can guarantee
183    that all legal ASSO_VALUES are visited without repetition since
184    GET_JUMP (option) was forced to be an odd value! */
185 
186 static bool
affects_prev(c,curr)187 affects_prev (c, curr)
188      char c;
189      LIST_NODE *curr;
190 {
191   int original_char = asso_values[c];
192   int i = !OPTION_ENABLED (option, FAST) ? GET_ASSO_MAX (option) :
193     GET_ITERATIONS (option) == 0 ? key_list.list_len : GET_ITERATIONS (option);
194 
195   /* Try all asso_values. */
196 
197   while (--i >= 0)
198     {
199       int        collisions = 0;
200       LIST_NODE *ptr;
201 
202       asso_values[c] = asso_values[c] + (GET_JUMP (option) ? GET_JUMP (option) : random ())
203         & GET_ASSO_MAX (option) - 1;
204       bool_array_reset ();
205 
206       /* See how this asso_value change affects previous keywords.  If
207          it does better than before we'll take it! */
208 
209       for (ptr = key_list.head;
210            !lookup (hash (ptr)) || ++collisions < perfect.fewest_collisions;
211            ptr = ptr->next)
212         if (ptr == curr)
213           {
214             perfect.fewest_collisions = collisions;
215             return FALSE;
216           }
217     }
218 
219   asso_values[c] = original_char; /* Restore original values, no more tries. */
220   return TRUE; /* If we're this far it's time to try the next character.... */
221 }
222 
223 /* Change a character value, try least-used characters first. */
224 
225 static void
change(prior,curr)226 change (prior, curr)
227      LIST_NODE *prior;
228      LIST_NODE *curr;
229 {
230   char        *xmalloc ();
231   static char *union_set = 0;
232   char        *temp;
233   LIST_NODE   *ptr;
234 
235   if (!union_set)
236     union_set = xmalloc (2 * GET_CHARSET_SIZE (option) + 1);
237 
238   if (OPTION_ENABLED (option, DEBUG)) /* Very useful for debugging. */
239     {
240       fprintf (stderr, "collision on keyword #%d, prior=\"%s\", curr=\"%s\", hash=%d\n",
241                perfect.num_done, prior->key, curr->key, curr->hash_value);
242       fflush (stderr);
243     }
244   sort_set (union_set, compute_disjoint_union (prior->char_set, curr->char_set, union_set));
245 
246   /* Try changing some values, if change doesn't alter other values continue normal action. */
247 
248   perfect.fewest_collisions++;
249 
250   for (temp = union_set; *temp; temp++)
251     if (!affects_prev (*temp, curr))
252       {
253         if (OPTION_ENABLED (option, DEBUG))
254           {
255             fprintf (stderr, "- resolved by changing asso_value['%c'] (char #%d) to %d\n",
256                      *temp, temp - union_set + 1, asso_values[*temp]);
257             fflush (stderr);
258           }
259         return; /* Good, doesn't affect previous hash values, we'll take it. */
260       }
261 
262   for (ptr = key_list.head; ptr != curr; ptr = ptr->next)
263     hash (ptr);
264 
265   hash (curr);
266 
267   if (OPTION_ENABLED (option, DEBUG))
268     {
269       fprintf (stderr, "** collision not resolved, %d duplicates remain, continuing...\n",
270                perfect.fewest_collisions);
271       fflush (stderr);
272     }
273 }
274 
275 /* Does the hard stuff....
276    Initializes the Iteration Number boolean array, and then trys to find a
277    perfect function that will hash all the key words without getting any
278    duplications.  This is made much easier since we aren't attempting
279    to generate *minimum* functions, only perfect ones.
280    If we can't generate a perfect function in one pass *and* the user
281    hasn't enabled the DUP option, we'll inform the user to try the
282    randomization option, use -D, or choose alternative key positions.
283    The alternatives (e.g., back-tracking) are too time-consuming, i.e,
284    exponential in the number of keys. */
285 
286 int
perfect_generate()287 perfect_generate ()
288 {
289   LIST_NODE *curr;
290   bool_array_init (perfect.max_hash_value);
291 
292   for (curr = key_list.head; curr; curr = curr->next)
293     {
294       LIST_NODE *ptr;
295       hash (curr);
296 
297       for (ptr = key_list.head; ptr != curr; ptr = ptr->next)
298         if (ptr->hash_value == curr->hash_value)
299           {
300             change (ptr, curr);
301             break;
302           }
303       perfect.num_done++;
304     }
305 
306 
307   /* Make one final check, just to make sure nothing weird happened.... */
308   bool_array_reset ();
309 
310   for (curr = key_list.head; curr; curr = curr->next)
311     if (lookup (hash (curr)))
312       if (OPTION_ENABLED (option, DUP)) /* We'll try to deal with this later..... */
313         break;
314       else /* Yow, big problems.  we're outta here! */
315         {
316           report_error ("\nInternal error, duplicate value %d:\n\
317 try options -D or -r, or use new key positions.\n\n",
318                         hash (curr));
319           return 1;
320         }
321 
322   bool_array_destroy ();
323 
324   /* First sorts the key word list by hash value, and the outputs the
325      list to the proper ostream. The generated hash table code is only
326      output if the early stage of processing turned out O.K. */
327 
328   sort ();
329   print_output ();
330   return 0;
331 }
332 
333 /* Prints out some diagnostics upon completion. */
334 
335 void
perfect_destroy()336 perfect_destroy ()
337 {
338   if (OPTION_ENABLED (option, DEBUG))
339     {
340       int i;
341 
342       fprintf (stderr, "\ndumping occurrence and associated values tables\n");
343 
344       for (i = 0; i < ALPHABET_SIZE; i++)
345         if (occurrences[i])
346           fprintf (stderr, "asso_values[%c] = %3d, occurrences[%c] = %3d\n",
347                    i, asso_values[i], i, occurrences[i]);
348 
349       fprintf (stderr, "end table dumping\n");
350 
351     }
352 }
353 
354