1 /* Grammar reduction for Bison.
2 
3    Copyright (C) 1988-1989, 2000-2003, 2005-2015, 2018-2021 Free
4    Software Foundation, Inc.
5 
6    This file is part of Bison, the GNU Compiler Compiler.
7 
8    This program is free software: you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation, either version 3 of the License, or
11    (at your option) any later version.
12 
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17 
18    You should have received a copy of the GNU General Public License
19    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
20 
21 
22 /* Reduce the grammar: Find and eliminate unreachable terminals,
23    nonterminals, and productions.  David S. Bakin.  */
24 
25 /* Don't eliminate unreachable terminals: They may be used by the
26    user's parser.  */
27 
28 #include <config.h>
29 #include "system.h"
30 
31 #include <bitset.h>
32 
33 #include "complain.h"
34 #include "files.h"
35 #include "getargs.h"
36 #include "gram.h"
37 #include "print-xml.h"
38 #include "reader.h"
39 #include "reduce.h"
40 #include "symtab.h"
41 
42 /* Set of nonterminals whose language is not empty.  */
43 static bitset N;
44 
45 /* Set of rules that have no useless nonterminals in their RHS.  */
46 static bitset P;
47 
48 /* Set of accessible symbols.  */
49 static bitset V;
50 
51 /* Set of symbols used to define rule precedence (so they are
52    'useless', but no warning should be issued).  */
53 static bitset V1;
54 
55 int nuseless_productions;
56 int nuseless_nonterminals;
57 
58 #define bitset_swap(Lhs, Rhs)                   \
59   do {                                          \
60     bitset lhs__ = Lhs;                         \
61     Lhs = Rhs;                                  \
62     Rhs = lhs__;                                \
63   } while (0)
64 
65 /*-------------------------------------------------------------------.
66 | Another way to do this would be with a set for each production and |
67 | then do subset tests against N0, but even for the C grammar the    |
68 | whole reducing process takes only 2 seconds on my 8Mhz AT.         |
69 `-------------------------------------------------------------------*/
70 
71 static bool
useful_production(rule_number r,bitset N0)72 useful_production (rule_number r, bitset N0)
73 {
74   /* A production is useful if all of the nonterminals in its appear
75      in the set of useful nonterminals.  */
76 
77   for (item_number *rhsp = rules[r].rhs; 0 <= *rhsp; ++rhsp)
78     if (ISVAR (*rhsp) && !bitset_test (N0, *rhsp - ntokens))
79       return false;
80   return true;
81 }
82 
83 
84 /*-----------------------------------------------------------------.
85 | Compute N, the set of nonterminals whose language is not empty.  |
86 |                                                                  |
87 | Remember that rules are 1-origin, symbols are 0-origin.          |
88 `-----------------------------------------------------------------*/
89 
90 static void
useless_nonterminals(void)91 useless_nonterminals (void)
92 {
93   /* N is set as built.  Np is set being built this iteration. P is
94      set of all productions which have a RHS all in N.  */
95 
96   bitset Np = bitset_create (nnterms, BITSET_FIXED);
97 
98   /* The set being computed is a set of nonterminals which can derive
99      the empty string or strings consisting of all terminals. At each
100      iteration a nonterminal is added to the set if there is a
101      production with that nonterminal as its LHS for which all the
102      nonterminals in its RHS are already in the set.  Iterate until
103      the set being computed remains unchanged.  Any nonterminals not
104      in the set at that point are useless in that they will never be
105      used in deriving a sentence of the language.
106 
107      This iteration doesn't use any special traversal over the
108      productions.  A set is kept of all productions for which all the
109      nonterminals in the RHS are in useful.  Only productions not in
110      this set are scanned on each iteration.  At the end, this set is
111      saved to be used when finding useful productions: only
112      productions in this set will appear in the final grammar.  */
113 
114   while (1)
115     {
116       bitset_copy (Np, N);
117       for (rule_number r = 0; r < nrules; ++r)
118         if (!bitset_test (P, r)
119             && useful_production (r, N))
120           {
121             bitset_set (Np, rules[r].lhs->number - ntokens);
122             bitset_set (P, r);
123           }
124       if (bitset_equal_p (N, Np))
125         break;
126       bitset_swap (N, Np);
127     }
128   bitset_free (N);
129   N = Np;
130 }
131 
132 
133 static void
inaccessable_symbols(void)134 inaccessable_symbols (void)
135 {
136   /* Find out which productions are reachable and which symbols are
137      used.  Starting with an empty set of productions and a set of
138      symbols which only has the start symbol in it, iterate over all
139      productions until the set of productions remains unchanged for an
140      iteration.  For each production which has a LHS in the set of
141      reachable symbols, add the production to the set of reachable
142      productions, and add all of the nonterminals in the RHS of the
143      production to the set of reachable symbols.
144 
145      Consider only the (partially) reduced grammar which has only
146      nonterminals in N and productions in P.
147 
148      The result is the set P of productions in the reduced grammar,
149      and the set V of symbols in the reduced grammar.
150 
151      Although this algorithm also computes the set of terminals which
152      are reachable, no terminal will be deleted from the grammar. Some
153      terminals might not be in the grammar but might be generated by
154      semantic routines, and so the user might want them available with
155      specified numbers.  (Is this true?)  However, the nonreachable
156      terminals are printed (if running in verbose mode) so that the
157      user can know.  */
158 
159   bitset Vp = bitset_create (nsyms, BITSET_FIXED);
160   bitset Pp = bitset_create (nrules, BITSET_FIXED);
161 
162   /* If the start symbol isn't useful, then nothing will be useful. */
163   if (bitset_test (N, acceptsymbol->content->number - ntokens))
164     {
165       bitset_set (V, acceptsymbol->content->number);
166 
167       while (1)
168         {
169           bitset_copy (Vp, V);
170           for (rule_number r = 0; r < nrules; ++r)
171             if (!bitset_test (Pp, r)
172                 && bitset_test (P, r)
173                 && bitset_test (V, rules[r].lhs->number))
174               {
175                 for (item_number *rhsp = rules[r].rhs; 0 <= *rhsp; ++rhsp)
176                   if (ISTOKEN (*rhsp) || bitset_test (N, *rhsp - ntokens))
177                     bitset_set (Vp, *rhsp);
178                 bitset_set (Pp, r);
179               }
180           if (bitset_equal_p (V, Vp))
181             break;
182           bitset_swap (V, Vp);
183         }
184     }
185 
186   bitset_free (V);
187   V = Vp;
188 
189   /* These tokens (numbered 0, 1, and 2) are internal to Bison.
190      Consider them useful. */
191   bitset_set (V, eoftoken->content->number);   /* end-of-input token */
192   bitset_set (V, errtoken->content->number);   /* error token */
193   bitset_set (V, undeftoken->content->number); /* some undefined token */
194 
195   bitset_free (P);
196   P = Pp;
197 
198   int nuseful_productions = bitset_count (P);
199   nuseless_productions = nrules - nuseful_productions;
200 
201   int nuseful_nonterminals = 0;
202   for (symbol_number i = ntokens; i < nsyms; ++i)
203     nuseful_nonterminals += bitset_test (V, i);
204   nuseless_nonterminals = nnterms - nuseful_nonterminals;
205 
206   /* A token that was used in %prec should not be warned about.  */
207   for (rule_number r = 0; r < nrules; ++r)
208     if (rules[r].precsym != 0)
209       bitset_set (V1, rules[r].precsym->number);
210 }
211 
212 
213 /*-------------------------------------------------------------------.
214 | Put the useless productions at the end of RULES, and adjust NRULES |
215 | accordingly.                                                       |
216 `-------------------------------------------------------------------*/
217 
218 static void
reduce_grammar_tables(void)219 reduce_grammar_tables (void)
220 {
221   /* Report and flag useless productions.  */
222   {
223     for (rule_number r = 0; r < nrules; ++r)
224       rules[r].useful = bitset_test (P, r);
225     grammar_rules_useless_report (_("rule useless in grammar"));
226   }
227 
228   /* Map the nonterminals to their new index: useful first, useless
229      afterwards.  Kept for later report.  */
230   {
231     int useful = 0;
232     int useless = nrules - nuseless_productions;
233     rule *rules_sorted = xnmalloc (nrules, sizeof *rules_sorted);
234     for (rule_number r = 0; r < nrules; ++r)
235       rules_sorted[rules[r].useful ? useful++ : useless++] = rules[r];
236     free (rules);
237     rules = rules_sorted;
238 
239     /* Renumber the rules markers in RITEMS.  */
240     for (rule_number r = 0; r < nrules; ++r)
241       {
242         item_number *rhsp = rules[r].rhs;
243         for (/* Nothing. */; 0 <= *rhsp; ++rhsp)
244           continue;
245         *rhsp = rule_number_as_item_number (r);
246         rules[r].number = r;
247       }
248     nrules -= nuseless_productions;
249   }
250 
251   /* Adjust NRITEMS.  */
252   for (rule_number r = nrules; r < nrules + nuseless_productions; ++r)
253     nritems -= rule_rhs_length (&rules[r]) + 1;
254 }
255 
256 
257 /*------------------------------.
258 | Remove useless nonterminals.  |
259 `------------------------------*/
260 
261 symbol_number *nterm_map = NULL;
262 
263 static void
nonterminals_reduce(void)264 nonterminals_reduce (void)
265 {
266   nterm_map = xnmalloc (nnterms, sizeof *nterm_map);
267   /* Map the nonterminals to their new index: useful first, useless
268      afterwards.  Kept for later report.  */
269   {
270     symbol_number n = ntokens;
271     for (symbol_number i = ntokens; i < nsyms; ++i)
272       if (bitset_test (V, i))
273         nterm_map[i - ntokens] = n++;
274     for (symbol_number i = ntokens; i < nsyms; ++i)
275       if (!bitset_test (V, i))
276         {
277           nterm_map[i - ntokens] = n++;
278           if (symbols[i]->content->status != used)
279             complain (&symbols[i]->location, Wother,
280                       _("nonterminal useless in grammar: %s"),
281                       symbols[i]->tag);
282         }
283   }
284 
285   /* Shuffle elements of tables indexed by symbol number.  */
286   {
287     symbol **symbols_sorted = xnmalloc (nnterms, sizeof *symbols_sorted);
288     for (symbol_number i = ntokens; i < nsyms; ++i)
289       symbols[i]->content->number = nterm_map[i - ntokens];
290     for (symbol_number i = ntokens; i < nsyms; ++i)
291       symbols_sorted[nterm_map[i - ntokens] - ntokens] = symbols[i];
292     for (symbol_number i = ntokens; i < nsyms; ++i)
293       symbols[i] = symbols_sorted[i - ntokens];
294     free (symbols_sorted);
295   }
296 
297   /* Update nonterminal numbers in the RHS of the rules.  LHS are
298      pointers to the symbol structure, they don't need renumbering. */
299   {
300     for (rule_number r = 0; r < nrules; ++r)
301       for (item_number *rhsp = rules[r].rhs; 0 <= *rhsp; ++rhsp)
302         if (ISVAR (*rhsp))
303           *rhsp = symbol_number_as_item_number (nterm_map[*rhsp - ntokens]);
304     acceptsymbol->content->number = nterm_map[acceptsymbol->content->number - ntokens];
305   }
306 
307   nsyms -= nuseless_nonterminals;
308   nnterms -= nuseless_nonterminals;
309 }
310 
311 
312 /*------------------------------------------------------------------.
313 | Output the detailed results of the reductions.  For FILE.output.  |
314 `------------------------------------------------------------------*/
315 
316 void
reduce_output(FILE * out)317 reduce_output (FILE *out)
318 {
319   if (nuseless_nonterminals)
320     {
321       fprintf (out, "%s\n\n", _("Nonterminals useless in grammar"));
322       for (int i = 0; i < nuseless_nonterminals; ++i)
323         fprintf (out, "    %s\n", symbols[nsyms + i]->tag);
324       fputs ("\n\n", out);
325     }
326 
327   {
328     bool b = false;
329     for (int i = 0; i < ntokens; ++i)
330       if (reduce_token_unused_in_grammar (i))
331         {
332           if (!b)
333             fprintf (out, "%s\n\n", _("Terminals unused in grammar"));
334           b = true;
335           fprintf (out, "    %s\n", symbols[i]->tag);
336         }
337     if (b)
338       fputs ("\n\n", out);
339   }
340 
341   if (nuseless_productions)
342     grammar_rules_partial_print (out, _("Rules useless in grammar"),
343                                  rule_useless_in_grammar_p);
344 }
345 
346 
347 /*-------------------------------.
348 | Report the results to STDERR.  |
349 `-------------------------------*/
350 
351 static void
reduce_print(void)352 reduce_print (void)
353 {
354   if (nuseless_nonterminals)
355     complain (NULL, Wother, ngettext ("%d nonterminal useless in grammar",
356                                       "%d nonterminals useless in grammar",
357                                       nuseless_nonterminals),
358               nuseless_nonterminals);
359   if (nuseless_productions)
360     complain (NULL, Wother, ngettext ("%d rule useless in grammar",
361                                       "%d rules useless in grammar",
362                                       nuseless_productions),
363               nuseless_productions);
364 }
365 
366 void
reduce_grammar(void)367 reduce_grammar (void)
368 {
369   /* Allocate the global sets used to compute the reduced grammar */
370 
371   N = bitset_create (nnterms, BITSET_FIXED);
372   P =  bitset_create (nrules, BITSET_FIXED);
373   V = bitset_create (nsyms, BITSET_FIXED);
374   V1 = bitset_create (nsyms, BITSET_FIXED);
375 
376   useless_nonterminals ();
377   inaccessable_symbols ();
378 
379   /* Did we reduce something? */
380   if (nuseless_nonterminals || nuseless_productions)
381     {
382       reduce_print ();
383 
384       if (!bitset_test (N, acceptsymbol->content->number - ntokens))
385         complain (&startsymbol_loc, fatal,
386                   _("start symbol %s does not derive any sentence"),
387                   startsymbol->tag);
388 
389       /* First reduce the nonterminals, as they renumber themselves in the
390          whole grammar.  If you change the order, nonterms would be
391          renumbered only in the reduced grammar.  */
392       if (nuseless_nonterminals)
393         nonterminals_reduce ();
394       if (nuseless_productions)
395         reduce_grammar_tables ();
396     }
397 
398   if (trace_flag & trace_grammar)
399     {
400       grammar_dump (stderr, "Reduced Grammar");
401 
402       fprintf (stderr, "reduced %s defines %d terminals, %d nonterminals"
403                ", and %d productions.\n",
404                grammar_file, ntokens, nnterms, nrules);
405     }
406 }
407 
408 bool
reduce_token_unused_in_grammar(symbol_number i)409 reduce_token_unused_in_grammar (symbol_number i)
410 {
411   aver (i < ntokens);
412   return !bitset_test (V, i) && !bitset_test (V1, i);
413 }
414 
415 bool
reduce_nonterminal_useless_in_grammar(const sym_content * sym)416 reduce_nonterminal_useless_in_grammar (const sym_content *sym)
417 {
418   symbol_number n = sym->number;
419   aver (ntokens <= n && n < nsyms + nuseless_nonterminals);
420   return nsyms <= n;
421 }
422 
423 /*-----------------------------------------------------------.
424 | Free the global sets used to compute the reduced grammar.  |
425 `-----------------------------------------------------------*/
426 
427 void
reduce_free(void)428 reduce_free (void)
429 {
430   bitset_free (N);
431   bitset_free (V);
432   bitset_free (V1);
433   bitset_free (P);
434   free (nterm_map);
435   nterm_map = NULL;
436 }
437