1 /* Grammar reduction for Bison.
2
3 Copyright (C) 1988-1989, 2000-2003, 2005-2015, 2018-2021 Free
4 Software Foundation, Inc.
5
6 This file is part of Bison, the GNU Compiler Compiler.
7
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <https://www.gnu.org/licenses/>. */
20
21
22 /* Reduce the grammar: Find and eliminate unreachable terminals,
23 nonterminals, and productions. David S. Bakin. */
24
25 /* Don't eliminate unreachable terminals: They may be used by the
26 user's parser. */
27
28 #include <config.h>
29 #include "system.h"
30
31 #include <bitset.h>
32
33 #include "complain.h"
34 #include "files.h"
35 #include "getargs.h"
36 #include "gram.h"
37 #include "print-xml.h"
38 #include "reader.h"
39 #include "reduce.h"
40 #include "symtab.h"
41
42 /* Set of nonterminals whose language is not empty. */
43 static bitset N;
44
45 /* Set of rules that have no useless nonterminals in their RHS. */
46 static bitset P;
47
48 /* Set of accessible symbols. */
49 static bitset V;
50
51 /* Set of symbols used to define rule precedence (so they are
52 'useless', but no warning should be issued). */
53 static bitset V1;
54
55 int nuseless_productions;
56 int nuseless_nonterminals;
57
58 #define bitset_swap(Lhs, Rhs) \
59 do { \
60 bitset lhs__ = Lhs; \
61 Lhs = Rhs; \
62 Rhs = lhs__; \
63 } while (0)
64
65 /*-------------------------------------------------------------------.
66 | Another way to do this would be with a set for each production and |
67 | then do subset tests against N0, but even for the C grammar the |
68 | whole reducing process takes only 2 seconds on my 8Mhz AT. |
69 `-------------------------------------------------------------------*/
70
71 static bool
useful_production(rule_number r,bitset N0)72 useful_production (rule_number r, bitset N0)
73 {
74 /* A production is useful if all of the nonterminals in its appear
75 in the set of useful nonterminals. */
76
77 for (item_number *rhsp = rules[r].rhs; 0 <= *rhsp; ++rhsp)
78 if (ISVAR (*rhsp) && !bitset_test (N0, *rhsp - ntokens))
79 return false;
80 return true;
81 }
82
83
84 /*-----------------------------------------------------------------.
85 | Compute N, the set of nonterminals whose language is not empty. |
86 | |
87 | Remember that rules are 1-origin, symbols are 0-origin. |
88 `-----------------------------------------------------------------*/
89
90 static void
useless_nonterminals(void)91 useless_nonterminals (void)
92 {
93 /* N is set as built. Np is set being built this iteration. P is
94 set of all productions which have a RHS all in N. */
95
96 bitset Np = bitset_create (nnterms, BITSET_FIXED);
97
98 /* The set being computed is a set of nonterminals which can derive
99 the empty string or strings consisting of all terminals. At each
100 iteration a nonterminal is added to the set if there is a
101 production with that nonterminal as its LHS for which all the
102 nonterminals in its RHS are already in the set. Iterate until
103 the set being computed remains unchanged. Any nonterminals not
104 in the set at that point are useless in that they will never be
105 used in deriving a sentence of the language.
106
107 This iteration doesn't use any special traversal over the
108 productions. A set is kept of all productions for which all the
109 nonterminals in the RHS are in useful. Only productions not in
110 this set are scanned on each iteration. At the end, this set is
111 saved to be used when finding useful productions: only
112 productions in this set will appear in the final grammar. */
113
114 while (1)
115 {
116 bitset_copy (Np, N);
117 for (rule_number r = 0; r < nrules; ++r)
118 if (!bitset_test (P, r)
119 && useful_production (r, N))
120 {
121 bitset_set (Np, rules[r].lhs->number - ntokens);
122 bitset_set (P, r);
123 }
124 if (bitset_equal_p (N, Np))
125 break;
126 bitset_swap (N, Np);
127 }
128 bitset_free (N);
129 N = Np;
130 }
131
132
133 static void
inaccessable_symbols(void)134 inaccessable_symbols (void)
135 {
136 /* Find out which productions are reachable and which symbols are
137 used. Starting with an empty set of productions and a set of
138 symbols which only has the start symbol in it, iterate over all
139 productions until the set of productions remains unchanged for an
140 iteration. For each production which has a LHS in the set of
141 reachable symbols, add the production to the set of reachable
142 productions, and add all of the nonterminals in the RHS of the
143 production to the set of reachable symbols.
144
145 Consider only the (partially) reduced grammar which has only
146 nonterminals in N and productions in P.
147
148 The result is the set P of productions in the reduced grammar,
149 and the set V of symbols in the reduced grammar.
150
151 Although this algorithm also computes the set of terminals which
152 are reachable, no terminal will be deleted from the grammar. Some
153 terminals might not be in the grammar but might be generated by
154 semantic routines, and so the user might want them available with
155 specified numbers. (Is this true?) However, the nonreachable
156 terminals are printed (if running in verbose mode) so that the
157 user can know. */
158
159 bitset Vp = bitset_create (nsyms, BITSET_FIXED);
160 bitset Pp = bitset_create (nrules, BITSET_FIXED);
161
162 /* If the start symbol isn't useful, then nothing will be useful. */
163 if (bitset_test (N, acceptsymbol->content->number - ntokens))
164 {
165 bitset_set (V, acceptsymbol->content->number);
166
167 while (1)
168 {
169 bitset_copy (Vp, V);
170 for (rule_number r = 0; r < nrules; ++r)
171 if (!bitset_test (Pp, r)
172 && bitset_test (P, r)
173 && bitset_test (V, rules[r].lhs->number))
174 {
175 for (item_number *rhsp = rules[r].rhs; 0 <= *rhsp; ++rhsp)
176 if (ISTOKEN (*rhsp) || bitset_test (N, *rhsp - ntokens))
177 bitset_set (Vp, *rhsp);
178 bitset_set (Pp, r);
179 }
180 if (bitset_equal_p (V, Vp))
181 break;
182 bitset_swap (V, Vp);
183 }
184 }
185
186 bitset_free (V);
187 V = Vp;
188
189 /* These tokens (numbered 0, 1, and 2) are internal to Bison.
190 Consider them useful. */
191 bitset_set (V, eoftoken->content->number); /* end-of-input token */
192 bitset_set (V, errtoken->content->number); /* error token */
193 bitset_set (V, undeftoken->content->number); /* some undefined token */
194
195 bitset_free (P);
196 P = Pp;
197
198 int nuseful_productions = bitset_count (P);
199 nuseless_productions = nrules - nuseful_productions;
200
201 int nuseful_nonterminals = 0;
202 for (symbol_number i = ntokens; i < nsyms; ++i)
203 nuseful_nonterminals += bitset_test (V, i);
204 nuseless_nonterminals = nnterms - nuseful_nonterminals;
205
206 /* A token that was used in %prec should not be warned about. */
207 for (rule_number r = 0; r < nrules; ++r)
208 if (rules[r].precsym != 0)
209 bitset_set (V1, rules[r].precsym->number);
210 }
211
212
213 /*-------------------------------------------------------------------.
214 | Put the useless productions at the end of RULES, and adjust NRULES |
215 | accordingly. |
216 `-------------------------------------------------------------------*/
217
218 static void
reduce_grammar_tables(void)219 reduce_grammar_tables (void)
220 {
221 /* Report and flag useless productions. */
222 {
223 for (rule_number r = 0; r < nrules; ++r)
224 rules[r].useful = bitset_test (P, r);
225 grammar_rules_useless_report (_("rule useless in grammar"));
226 }
227
228 /* Map the nonterminals to their new index: useful first, useless
229 afterwards. Kept for later report. */
230 {
231 int useful = 0;
232 int useless = nrules - nuseless_productions;
233 rule *rules_sorted = xnmalloc (nrules, sizeof *rules_sorted);
234 for (rule_number r = 0; r < nrules; ++r)
235 rules_sorted[rules[r].useful ? useful++ : useless++] = rules[r];
236 free (rules);
237 rules = rules_sorted;
238
239 /* Renumber the rules markers in RITEMS. */
240 for (rule_number r = 0; r < nrules; ++r)
241 {
242 item_number *rhsp = rules[r].rhs;
243 for (/* Nothing. */; 0 <= *rhsp; ++rhsp)
244 continue;
245 *rhsp = rule_number_as_item_number (r);
246 rules[r].number = r;
247 }
248 nrules -= nuseless_productions;
249 }
250
251 /* Adjust NRITEMS. */
252 for (rule_number r = nrules; r < nrules + nuseless_productions; ++r)
253 nritems -= rule_rhs_length (&rules[r]) + 1;
254 }
255
256
257 /*------------------------------.
258 | Remove useless nonterminals. |
259 `------------------------------*/
260
261 symbol_number *nterm_map = NULL;
262
263 static void
nonterminals_reduce(void)264 nonterminals_reduce (void)
265 {
266 nterm_map = xnmalloc (nnterms, sizeof *nterm_map);
267 /* Map the nonterminals to their new index: useful first, useless
268 afterwards. Kept for later report. */
269 {
270 symbol_number n = ntokens;
271 for (symbol_number i = ntokens; i < nsyms; ++i)
272 if (bitset_test (V, i))
273 nterm_map[i - ntokens] = n++;
274 for (symbol_number i = ntokens; i < nsyms; ++i)
275 if (!bitset_test (V, i))
276 {
277 nterm_map[i - ntokens] = n++;
278 if (symbols[i]->content->status != used)
279 complain (&symbols[i]->location, Wother,
280 _("nonterminal useless in grammar: %s"),
281 symbols[i]->tag);
282 }
283 }
284
285 /* Shuffle elements of tables indexed by symbol number. */
286 {
287 symbol **symbols_sorted = xnmalloc (nnterms, sizeof *symbols_sorted);
288 for (symbol_number i = ntokens; i < nsyms; ++i)
289 symbols[i]->content->number = nterm_map[i - ntokens];
290 for (symbol_number i = ntokens; i < nsyms; ++i)
291 symbols_sorted[nterm_map[i - ntokens] - ntokens] = symbols[i];
292 for (symbol_number i = ntokens; i < nsyms; ++i)
293 symbols[i] = symbols_sorted[i - ntokens];
294 free (symbols_sorted);
295 }
296
297 /* Update nonterminal numbers in the RHS of the rules. LHS are
298 pointers to the symbol structure, they don't need renumbering. */
299 {
300 for (rule_number r = 0; r < nrules; ++r)
301 for (item_number *rhsp = rules[r].rhs; 0 <= *rhsp; ++rhsp)
302 if (ISVAR (*rhsp))
303 *rhsp = symbol_number_as_item_number (nterm_map[*rhsp - ntokens]);
304 acceptsymbol->content->number = nterm_map[acceptsymbol->content->number - ntokens];
305 }
306
307 nsyms -= nuseless_nonterminals;
308 nnterms -= nuseless_nonterminals;
309 }
310
311
312 /*------------------------------------------------------------------.
313 | Output the detailed results of the reductions. For FILE.output. |
314 `------------------------------------------------------------------*/
315
316 void
reduce_output(FILE * out)317 reduce_output (FILE *out)
318 {
319 if (nuseless_nonterminals)
320 {
321 fprintf (out, "%s\n\n", _("Nonterminals useless in grammar"));
322 for (int i = 0; i < nuseless_nonterminals; ++i)
323 fprintf (out, " %s\n", symbols[nsyms + i]->tag);
324 fputs ("\n\n", out);
325 }
326
327 {
328 bool b = false;
329 for (int i = 0; i < ntokens; ++i)
330 if (reduce_token_unused_in_grammar (i))
331 {
332 if (!b)
333 fprintf (out, "%s\n\n", _("Terminals unused in grammar"));
334 b = true;
335 fprintf (out, " %s\n", symbols[i]->tag);
336 }
337 if (b)
338 fputs ("\n\n", out);
339 }
340
341 if (nuseless_productions)
342 grammar_rules_partial_print (out, _("Rules useless in grammar"),
343 rule_useless_in_grammar_p);
344 }
345
346
347 /*-------------------------------.
348 | Report the results to STDERR. |
349 `-------------------------------*/
350
351 static void
reduce_print(void)352 reduce_print (void)
353 {
354 if (nuseless_nonterminals)
355 complain (NULL, Wother, ngettext ("%d nonterminal useless in grammar",
356 "%d nonterminals useless in grammar",
357 nuseless_nonterminals),
358 nuseless_nonterminals);
359 if (nuseless_productions)
360 complain (NULL, Wother, ngettext ("%d rule useless in grammar",
361 "%d rules useless in grammar",
362 nuseless_productions),
363 nuseless_productions);
364 }
365
366 void
reduce_grammar(void)367 reduce_grammar (void)
368 {
369 /* Allocate the global sets used to compute the reduced grammar */
370
371 N = bitset_create (nnterms, BITSET_FIXED);
372 P = bitset_create (nrules, BITSET_FIXED);
373 V = bitset_create (nsyms, BITSET_FIXED);
374 V1 = bitset_create (nsyms, BITSET_FIXED);
375
376 useless_nonterminals ();
377 inaccessable_symbols ();
378
379 /* Did we reduce something? */
380 if (nuseless_nonterminals || nuseless_productions)
381 {
382 reduce_print ();
383
384 if (!bitset_test (N, acceptsymbol->content->number - ntokens))
385 complain (&startsymbol_loc, fatal,
386 _("start symbol %s does not derive any sentence"),
387 startsymbol->tag);
388
389 /* First reduce the nonterminals, as they renumber themselves in the
390 whole grammar. If you change the order, nonterms would be
391 renumbered only in the reduced grammar. */
392 if (nuseless_nonterminals)
393 nonterminals_reduce ();
394 if (nuseless_productions)
395 reduce_grammar_tables ();
396 }
397
398 if (trace_flag & trace_grammar)
399 {
400 grammar_dump (stderr, "Reduced Grammar");
401
402 fprintf (stderr, "reduced %s defines %d terminals, %d nonterminals"
403 ", and %d productions.\n",
404 grammar_file, ntokens, nnterms, nrules);
405 }
406 }
407
408 bool
reduce_token_unused_in_grammar(symbol_number i)409 reduce_token_unused_in_grammar (symbol_number i)
410 {
411 aver (i < ntokens);
412 return !bitset_test (V, i) && !bitset_test (V1, i);
413 }
414
415 bool
reduce_nonterminal_useless_in_grammar(const sym_content * sym)416 reduce_nonterminal_useless_in_grammar (const sym_content *sym)
417 {
418 symbol_number n = sym->number;
419 aver (ntokens <= n && n < nsyms + nuseless_nonterminals);
420 return nsyms <= n;
421 }
422
423 /*-----------------------------------------------------------.
424 | Free the global sets used to compute the reduced grammar. |
425 `-----------------------------------------------------------*/
426
427 void
reduce_free(void)428 reduce_free (void)
429 {
430 bitset_free (N);
431 bitset_free (V);
432 bitset_free (V1);
433 bitset_free (P);
434 free (nterm_map);
435 nterm_map = NULL;
436 }
437