1 /* Compute lookahead criteria for Bison.
2 
3    Copyright (C) 1984, 1986, 1989, 2000-2015, 2018-2021 Free Software
4    Foundation, Inc.
5 
6    This file is part of Bison, the GNU Compiler Compiler.
7 
8    This program is free software: you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation, either version 3 of the License, or
11    (at your option) any later version.
12 
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17 
18    You should have received a copy of the GNU General Public License
19    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
20 
21 
22 /* Find which rules need lookahead in each state, and which lookahead
23    tokens they accept.  */
24 
25 #include <config.h>
26 #include "system.h"
27 
28 #include <bitset.h>
29 #include <bitsetv.h>
30 
31 #include "complain.h"
32 #include "derives.h"
33 #include "getargs.h"
34 #include "gram.h"
35 #include "lalr.h"
36 #include "lr0.h"
37 #include "muscle-tab.h"
38 #include "nullable.h"
39 #include "reader.h"
40 #include "relation.h"
41 #include "symtab.h"
42 
43 /* goto_map[nterm - NTOKENS] -> number of gotos.  */
44 goto_number *goto_map = NULL;
45 goto_number ngotos = 0;
46 state_number *from_state = NULL;
47 state_number *to_state = NULL;
48 bitsetv goto_follows = NULL;
49 
50 /* Linked list of goto numbers.  */
51 typedef struct goto_list
52 {
53   struct goto_list *next;
54   goto_number value;
55 } goto_list;
56 
57 static goto_list *
goto_list_new(goto_number value,struct goto_list * next)58 goto_list_new (goto_number value, struct goto_list *next)
59 {
60   goto_list *res = xmalloc (sizeof *res);
61   res->next = next;
62   res->value = value;
63   return res;
64 }
65 
66 /* LA is an nLA by NTOKENS matrix of bits.  LA[l, i] is 1 if the rule
67    LArule[l] is applicable in the appropriate state when the next
68    token is symbol i.  If LA[l, i] and LA[l, j] are both 1 for i != j,
69    it is a conflict.  */
70 
71 static bitsetv LA = NULL;
72 size_t nLA;
73 
74 
75 /* "(p, A) includes (p', B)" iff
76    B → βAγ, γ nullable, and p'-- β --> p (i.e., state p' reaches p on label β).
77 
78    Definition p.621 [DeRemer 1982].
79 
80    INCLUDES[(p, A)] = [(p', B),...] */
81 static goto_number **includes;
82 
83 /* "(q, A → ω) lookback (p, A)" iff state p reaches state q on label ω.
84 
85    Definition p.621 [DeRemer 1982]. */
86 static goto_list **lookback;
87 
88 static void
goto_print(goto_number i,FILE * out)89 goto_print (goto_number i, FILE *out)
90 {
91   const state_number src = from_state[i];
92   const state_number dst = to_state[i];
93   symbol_number var = states[dst]->accessing_symbol;
94   fprintf (out,
95            "goto[%zu] = (%d, %s, %d)", i, src, symbols[var]->tag, dst);
96 }
97 
98 void
set_goto_map(void)99 set_goto_map (void)
100 {
101   /* Count the number of gotos (ngotos) per nterm (goto_map). */
102   goto_map = xcalloc (nnterms + 1, sizeof *goto_map);
103   ngotos = 0;
104   for (state_number s = 0; s < nstates; ++s)
105     {
106       transitions *trans = states[s]->transitions;
107       for (int i = trans->num - 1; 0 <= i && TRANSITION_IS_GOTO (trans, i); --i)
108         {
109           ngotos++;
110           /* Abort if (ngotos + 1) would overflow.  */
111           aver (ngotos != GOTO_NUMBER_MAXIMUM);
112           goto_map[TRANSITION_SYMBOL (trans, i) - ntokens]++;
113         }
114     }
115 
116   goto_number *temp_map = xnmalloc (nnterms + 1, sizeof *temp_map);
117   {
118     goto_number k = 0;
119     for (symbol_number i = ntokens; i < nsyms; ++i)
120       {
121         temp_map[i - ntokens] = k;
122         k += goto_map[i - ntokens];
123       }
124 
125     for (symbol_number i = ntokens; i < nsyms; ++i)
126       goto_map[i - ntokens] = temp_map[i - ntokens];
127 
128     goto_map[nsyms - ntokens] = ngotos;
129     temp_map[nsyms - ntokens] = ngotos;
130   }
131 
132   from_state = xcalloc (ngotos, sizeof *from_state);
133   to_state = xcalloc (ngotos, sizeof *to_state);
134 
135   for (state_number s = 0; s < nstates; ++s)
136     {
137       const transitions *trans = states[s]->transitions;
138       for (int i = trans->num - 1; 0 <= i && TRANSITION_IS_GOTO (trans, i); --i)
139         {
140           goto_number k = temp_map[TRANSITION_SYMBOL (trans, i) - ntokens]++;
141           from_state[k] = s;
142           to_state[k] = trans->states[i]->number;
143         }
144     }
145 
146   free (temp_map);
147 
148   if (trace_flag & trace_automaton)
149     for (int i = 0; i < ngotos; ++i)
150       {
151         goto_print (i, stderr);
152         fputc ('\n', stderr);
153       }
154 }
155 
156 
157 goto_number
map_goto(state_number src,symbol_number sym)158 map_goto (state_number src, symbol_number sym)
159 {
160   goto_number low = goto_map[sym - ntokens];
161   goto_number high = goto_map[sym - ntokens + 1] - 1;
162 
163   for (;;)
164     {
165       aver (low <= high);
166       goto_number middle = (low + high) / 2;
167       state_number s = from_state[middle];
168       if (s == src)
169         return middle;
170       else if (s < src)
171         low = middle + 1;
172       else
173         high = middle - 1;
174     }
175 }
176 
177 /* Print FOLLOWS for debugging.  */
178 static void
follows_print(const char * title,FILE * out)179 follows_print (const char* title, FILE *out)
180 {
181   fprintf (out, "%s:\n", title);
182   for (goto_number i = 0; i < ngotos; ++i)
183     {
184       fputs ("    FOLLOWS[", out);
185       goto_print (i, out);
186       fputs ("] =", out);
187       bitset_iterator iter;
188       symbol_number sym;
189       BITSET_FOR_EACH (iter, goto_follows[i], sym, 0)
190         fprintf (out, " %s", symbols[sym]->tag);
191       fputc ('\n', out);
192     }
193   fputc ('\n', out);
194 }
195 
196 /* Build goto_follows. */
197 static void
initialize_goto_follows(void)198 initialize_goto_follows (void)
199 {
200   goto_number **reads = xnmalloc (ngotos, sizeof *reads);
201   goto_number *edge = xnmalloc (ngotos, sizeof *edge);
202 
203   goto_follows = bitsetv_create (ngotos, ntokens, BITSET_FIXED);
204 
205   for (goto_number i = 0; i < ngotos; ++i)
206     {
207       state_number dst = to_state[i];
208       const transitions *trans = states[dst]->transitions;
209 
210       int j;
211       FOR_EACH_SHIFT (trans, j)
212         bitset_set (goto_follows[i], TRANSITION_SYMBOL (trans, j));
213 
214       /* Gotos outgoing from DST. */
215       goto_number nedges = 0;
216       for (; j < trans->num; ++j)
217         {
218           symbol_number sym = TRANSITION_SYMBOL (trans, j);
219           if (nullable[sym - ntokens])
220             {
221               assert (nedges < ngotos);
222               edge[nedges++] = map_goto (dst, sym);
223             }
224         }
225 
226       if (nedges == 0)
227         reads[i] = NULL;
228       else
229         {
230           reads[i] = xnmalloc (nedges + 1, sizeof reads[i][0]);
231           memcpy (reads[i], edge, nedges * sizeof edge[0]);
232           reads[i][nedges] = END_NODE;
233         }
234     }
235   if (trace_flag & trace_automaton)
236     {
237       follows_print ("follows after shifts", stderr);
238       relation_print ("reads", reads, ngotos, goto_print, stderr);
239     }
240 
241   relation_digraph (reads, ngotos, goto_follows);
242   if (trace_flag & trace_automaton)
243     follows_print ("follows after read", stderr);
244 
245   for (goto_number i = 0; i < ngotos; ++i)
246     free (reads[i]);
247   free (reads);
248   free (edge);
249 }
250 
251 
252 /* Find the state which LOOKBACK[LOOKBACK_INDEX] is about.  */
253 static const state *
lookback_find_state(int lookback_index)254 lookback_find_state (int lookback_index)
255 {
256   state *res = NULL;
257   for (int j = 0; j < nstates; ++j)
258     if (states[j]->reductions
259         && states[j]->reductions->lookaheads)
260       {
261         if (states[j]->reductions->lookaheads - LA > lookback_index)
262           /* Went too far. */
263           break;
264         else
265           res = states[j];
266       }
267   /* Pacify "potential null pointer dereference" warning.  */
268   if (!res)
269     abort ();
270   return res;
271 }
272 
273 /* Print LOOKBACK for debugging.  */
274 static void
lookback_print(FILE * out)275 lookback_print (FILE *out)
276 {
277   fputs ("lookback:\n", out);
278   for (int i = 0; i < nLA; ++i)
279     if (lookback[i])
280     {
281       fprintf (out, "   %3d = ", i);
282       const state *s = lookback_find_state (i);
283       int rnum = i - (s->reductions->lookaheads - LA);
284       const rule *r = s->reductions->rules[rnum];
285       fprintf (out, "(%3d, ", s->number);
286       rule_print (r, NULL, out);
287       fputs (") ->", out);
288       for (goto_list *sp = lookback[i]; sp; sp = sp->next)
289         {
290           fputc (' ', out);
291           goto_print (sp->value, out);
292         }
293       fputc ('\n', out);
294     }
295   fputc ('\n', out);
296 }
297 
298 /* Add (S, R) -> GOTONO to LOOKBACK.
299 
300    "(q, A → ω) lookback (p, A)" iff state p reaches state q on label ω.
301 
302    The goto number GOTONO, whose source is S (which is
303    inconsistent), */
304 static void
add_lookback_edge(state * s,rule const * r,goto_number gotono)305 add_lookback_edge (state *s, rule const *r, goto_number gotono)
306 {
307   int ri = state_reduction_find (s, r);
308   int idx = (s->reductions->lookaheads - LA) + ri;
309   lookback[idx] = goto_list_new (gotono, lookback[idx]);
310 }
311 
312 
313 /* Compute INCLUDES and LOOKBACK.  Corresponds to step E in Sec. 6 of
314    [DeRemer 1982].  */
315 static void
build_relations(void)316 build_relations (void)
317 {
318   goto_number *edge = xnmalloc (ngotos, sizeof *edge);
319   state_number *path = xnmalloc (ritem_longest_rhs () + 1, sizeof *path);
320 
321   includes = xnmalloc (ngotos, sizeof *includes);
322 
323   /* For each goto (from SRC to DST labeled by nterm VAR), iterate
324      over each rule with VAR as LHS, and find the path PATH from SRC
325      labeled with the RHS of the rule. */
326   for (goto_number i = 0; i < ngotos; ++i)
327     {
328       const state_number src = from_state[i];
329       const state_number dst = to_state[i];
330       symbol_number var = states[dst]->accessing_symbol;
331 
332       /* Size of EDGE.  */
333       int nedges = 0;
334       for (rule **rulep = derives[var - ntokens]; *rulep; ++rulep)
335         {
336           rule const *r = *rulep;
337           state *s = states[src];
338           path[0] = s->number;
339 
340           /* Length of PATH.  */
341           int length = 1;
342           for (item_number const *rp = r->rhs; 0 <= *rp; rp++)
343             {
344               symbol_number sym = item_number_as_symbol_number (*rp);
345               s = transitions_to (s, sym);
346               path[length++] = s->number;
347             }
348 
349           /* S is the end of PATH.  */
350           if (!s->consistent)
351             add_lookback_edge (s, r, i);
352 
353           /* Walk back PATH from penultimate to beginning.
354 
355              The "0 <= p" part is actually useless: each rhs ends in a
356              rule number (for which ISVAR(...) is false), and there is
357              a sentinel (ritem[-1]=0) before the first rhs.  */
358           for (int p = length - 2; 0 <= p && ISVAR (r->rhs[p]); --p)
359             {
360               symbol_number sym = item_number_as_symbol_number (r->rhs[p]);
361               goto_number g = map_goto (path[p], sym);
362               /* Insert G if not already in EDGE.
363                  FIXME: linear search.  A bitset instead?  */
364               {
365                 bool found = false;
366                 for (int j = 0; !found && j < nedges; ++j)
367                   found = edge[j] == g;
368                 if (!found)
369                   {
370                     assert (nedges < ngotos);
371                     edge[nedges++] = g;
372                   }
373               }
374               if (!nullable[sym - ntokens])
375                 break;
376             }
377         }
378 
379       if (trace_flag & trace_automaton)
380         {
381           goto_print (i, stderr);
382           fputs (" edges = ", stderr);
383           for (int j = 0; j < nedges; ++j)
384             {
385               fputc (' ', stderr);
386               goto_print (edge[j], stderr);
387             }
388           fputc ('\n', stderr);
389         }
390 
391       if (nedges == 0)
392         includes[i] = NULL;
393       else
394         {
395           includes[i] = xnmalloc (nedges + 1, sizeof includes[i][0]);
396           for (int j = 0; j < nedges; ++j)
397             includes[i][j] = edge[j];
398           includes[i][nedges] = END_NODE;
399         }
400     }
401 
402   free (edge);
403   free (path);
404 
405   relation_transpose (&includes, ngotos);
406   if (trace_flag & trace_automaton)
407     relation_print ("includes", includes, ngotos, goto_print, stderr);
408 }
409 
410 /* Compute FOLLOWS from INCLUDES, and free INCLUDES.  */
411 static void
compute_follows(void)412 compute_follows (void)
413 {
414   relation_digraph (includes, ngotos, goto_follows);
415   if (trace_flag & trace_sets)
416     follows_print ("follows after includes", stderr);
417   for (goto_number i = 0; i < ngotos; ++i)
418     free (includes[i]);
419   free (includes);
420 }
421 
422 
423 static void
compute_lookaheads(void)424 compute_lookaheads (void)
425 {
426   if (trace_flag & trace_automaton)
427       lookback_print (stderr);
428 
429   for (size_t i = 0; i < nLA; ++i)
430     for (goto_list *sp = lookback[i]; sp; sp = sp->next)
431       bitset_or (LA[i], LA[i], goto_follows[sp->value]);
432 
433   /* Free LOOKBACK. */
434   for (size_t i = 0; i < nLA; ++i)
435     LIST_FREE (goto_list, lookback[i]);
436   free (lookback);
437 }
438 
439 
440 /*------------------------------------------------------.
441 | Count the number of lookahead tokens required for S.  |
442 `------------------------------------------------------*/
443 
444 static int
state_lookaheads_count(state * s,bool default_reduction_only_for_accept)445 state_lookaheads_count (state *s, bool default_reduction_only_for_accept)
446 {
447   const reductions *reds = s->reductions;
448   const transitions *trans = s->transitions;
449 
450   /* Transitions are only disabled during conflict resolution, and that
451      hasn't happened yet, so there should be no need to check that
452      transition 0 hasn't been disabled before checking if it is a shift.
453      However, this check was performed at one time, so we leave it as an
454      aver.  */
455   aver (trans->num == 0 || !TRANSITION_IS_DISABLED (trans, 0));
456 
457   /* We need a lookahead either to distinguish different reductions
458      (i.e., there are two or more), or to distinguish a reduction from a
459      shift.  Otherwise, it is straightforward, and the state is
460      'consistent'.  However, do not treat a state with any reductions as
461      consistent unless it is the accepting state (because there is never
462      a lookahead token that makes sense there, and so no lookahead token
463      should be read) if the user has otherwise disabled default
464      reductions.  */
465   s->consistent =
466     !(reds->num > 1
467       || (reds->num == 1 && trans->num && TRANSITION_IS_SHIFT (trans, 0))
468       || (reds->num == 1 && reds->rules[0]->number != 0
469           && default_reduction_only_for_accept));
470 
471   return s->consistent ? 0 : reds->num;
472 }
473 
474 
475 /*----------------------------------------------.
476 | Compute LA, NLA, and the lookaheads members.  |
477 `----------------------------------------------*/
478 
479 void
initialize_LA(void)480 initialize_LA (void)
481 {
482   bool default_reduction_only_for_accept;
483   {
484     char *default_reductions =
485       muscle_percent_define_get ("lr.default-reduction");
486     default_reduction_only_for_accept = STREQ (default_reductions, "accepting");
487     free (default_reductions);
488   }
489 
490   /* Compute the total number of reductions requiring a lookahead.  */
491   nLA = 0;
492   for (state_number i = 0; i < nstates; ++i)
493     nLA += state_lookaheads_count (states[i],
494                                    default_reduction_only_for_accept);
495   /* Avoid having to special case 0.  */
496   if (!nLA)
497     nLA = 1;
498 
499   bitsetv pLA = LA = bitsetv_create (nLA, ntokens, BITSET_FIXED);
500 
501   /* Initialize the members LOOKAHEADS for each state whose reductions
502      require lookahead tokens.  */
503   for (state_number i = 0; i < nstates; ++i)
504     {
505       int count = state_lookaheads_count (states[i],
506                                           default_reduction_only_for_accept);
507       if (count)
508         {
509           states[i]->reductions->lookaheads = pLA;
510           pLA += count;
511         }
512     }
513 }
514 
515 
516 /*---------------------------------------------.
517 | Output the lookahead tokens for each state.  |
518 `---------------------------------------------*/
519 
520 static void
lookaheads_print(FILE * out)521 lookaheads_print (FILE *out)
522 {
523   fputs ("Lookaheads:\n", out);
524   for (state_number i = 0; i < nstates; ++i)
525     {
526       const reductions *reds = states[i]->reductions;
527       if (reds->num)
528         {
529           fprintf (out, "  State %d:\n", i);
530           for (int j = 0; j < reds->num; ++j)
531             {
532               fprintf (out, "    rule %d:", reds->rules[j]->number);
533               if (reds->lookaheads)
534               {
535                 bitset_iterator iter;
536                 int k;
537                 BITSET_FOR_EACH (iter, reds->lookaheads[j], k, 0)
538                   fprintf (out, " %s", symbols[k]->tag);
539               }
540               fputc ('\n', out);
541             }
542         }
543     }
544   fputc ('\n', out);
545 }
546 
547 void
lalr(void)548 lalr (void)
549 {
550   if (trace_flag & trace_automaton)
551     {
552       fputc ('\n', stderr);
553       begin_use_class ("trace0", stderr);
554       fprintf (stderr, "lalr: begin");
555       end_use_class ("trace0", stderr);
556       fputc ('\n', stderr);
557     }
558   initialize_LA ();
559   set_goto_map ();
560   initialize_goto_follows ();
561   lookback = xcalloc (nLA, sizeof *lookback);
562   build_relations ();
563   compute_follows ();
564   compute_lookaheads ();
565 
566   if (trace_flag & trace_sets)
567     lookaheads_print (stderr);
568   if (trace_flag & trace_automaton)
569     {
570       begin_use_class ("trace0", stderr);
571       fprintf (stderr, "lalr: done");
572       end_use_class ("trace0", stderr);
573       fputc ('\n', stderr);
574     }
575 }
576 
577 
578 void
lalr_update_state_numbers(state_number old_to_new[],state_number nstates_old)579 lalr_update_state_numbers (state_number old_to_new[], state_number nstates_old)
580 {
581   goto_number ngotos_reachable = 0;
582   symbol_number nonterminal = 0;
583   aver (nsyms == nnterms + ntokens);
584 
585   for (goto_number i = 0; i < ngotos; ++i)
586     {
587       while (i == goto_map[nonterminal])
588         goto_map[nonterminal++] = ngotos_reachable;
589       /* If old_to_new[from_state[i]] = nstates_old, remove this goto
590          entry.  */
591       if (old_to_new[from_state[i]] != nstates_old)
592         {
593           /* from_state[i] is not removed, so it and thus to_state[i] are
594              reachable, so to_state[i] != nstates_old.  */
595           aver (old_to_new[to_state[i]] != nstates_old);
596           from_state[ngotos_reachable] = old_to_new[from_state[i]];
597           to_state[ngotos_reachable] = old_to_new[to_state[i]];
598           ++ngotos_reachable;
599         }
600     }
601   while (nonterminal <= nnterms)
602     {
603       aver (ngotos == goto_map[nonterminal]);
604       goto_map[nonterminal++] = ngotos_reachable;
605     }
606   ngotos = ngotos_reachable;
607 }
608 
609 
610 void
lalr_free(void)611 lalr_free (void)
612 {
613   for (state_number s = 0; s < nstates; ++s)
614     states[s]->reductions->lookaheads = NULL;
615   bitsetv_free (LA);
616 }
617