1 /* Compute lookahead criteria for Bison.
2
3 Copyright (C) 1984, 1986, 1989, 2000-2015, 2018-2021 Free Software
4 Foundation, Inc.
5
6 This file is part of Bison, the GNU Compiler Compiler.
7
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <https://www.gnu.org/licenses/>. */
20
21
22 /* Find which rules need lookahead in each state, and which lookahead
23 tokens they accept. */
24
25 #include <config.h>
26 #include "system.h"
27
28 #include <bitset.h>
29 #include <bitsetv.h>
30
31 #include "complain.h"
32 #include "derives.h"
33 #include "getargs.h"
34 #include "gram.h"
35 #include "lalr.h"
36 #include "lr0.h"
37 #include "muscle-tab.h"
38 #include "nullable.h"
39 #include "reader.h"
40 #include "relation.h"
41 #include "symtab.h"
42
43 /* goto_map[nterm - NTOKENS] -> number of gotos. */
44 goto_number *goto_map = NULL;
45 goto_number ngotos = 0;
46 state_number *from_state = NULL;
47 state_number *to_state = NULL;
48 bitsetv goto_follows = NULL;
49
50 /* Linked list of goto numbers. */
51 typedef struct goto_list
52 {
53 struct goto_list *next;
54 goto_number value;
55 } goto_list;
56
57 static goto_list *
goto_list_new(goto_number value,struct goto_list * next)58 goto_list_new (goto_number value, struct goto_list *next)
59 {
60 goto_list *res = xmalloc (sizeof *res);
61 res->next = next;
62 res->value = value;
63 return res;
64 }
65
66 /* LA is an nLA by NTOKENS matrix of bits. LA[l, i] is 1 if the rule
67 LArule[l] is applicable in the appropriate state when the next
68 token is symbol i. If LA[l, i] and LA[l, j] are both 1 for i != j,
69 it is a conflict. */
70
71 static bitsetv LA = NULL;
72 size_t nLA;
73
74
75 /* "(p, A) includes (p', B)" iff
76 B → βAγ, γ nullable, and p'-- β --> p (i.e., state p' reaches p on label β).
77
78 Definition p.621 [DeRemer 1982].
79
80 INCLUDES[(p, A)] = [(p', B),...] */
81 static goto_number **includes;
82
83 /* "(q, A → ω) lookback (p, A)" iff state p reaches state q on label ω.
84
85 Definition p.621 [DeRemer 1982]. */
86 static goto_list **lookback;
87
88 static void
goto_print(goto_number i,FILE * out)89 goto_print (goto_number i, FILE *out)
90 {
91 const state_number src = from_state[i];
92 const state_number dst = to_state[i];
93 symbol_number var = states[dst]->accessing_symbol;
94 fprintf (out,
95 "goto[%zu] = (%d, %s, %d)", i, src, symbols[var]->tag, dst);
96 }
97
98 void
set_goto_map(void)99 set_goto_map (void)
100 {
101 /* Count the number of gotos (ngotos) per nterm (goto_map). */
102 goto_map = xcalloc (nnterms + 1, sizeof *goto_map);
103 ngotos = 0;
104 for (state_number s = 0; s < nstates; ++s)
105 {
106 transitions *trans = states[s]->transitions;
107 for (int i = trans->num - 1; 0 <= i && TRANSITION_IS_GOTO (trans, i); --i)
108 {
109 ngotos++;
110 /* Abort if (ngotos + 1) would overflow. */
111 aver (ngotos != GOTO_NUMBER_MAXIMUM);
112 goto_map[TRANSITION_SYMBOL (trans, i) - ntokens]++;
113 }
114 }
115
116 goto_number *temp_map = xnmalloc (nnterms + 1, sizeof *temp_map);
117 {
118 goto_number k = 0;
119 for (symbol_number i = ntokens; i < nsyms; ++i)
120 {
121 temp_map[i - ntokens] = k;
122 k += goto_map[i - ntokens];
123 }
124
125 for (symbol_number i = ntokens; i < nsyms; ++i)
126 goto_map[i - ntokens] = temp_map[i - ntokens];
127
128 goto_map[nsyms - ntokens] = ngotos;
129 temp_map[nsyms - ntokens] = ngotos;
130 }
131
132 from_state = xcalloc (ngotos, sizeof *from_state);
133 to_state = xcalloc (ngotos, sizeof *to_state);
134
135 for (state_number s = 0; s < nstates; ++s)
136 {
137 const transitions *trans = states[s]->transitions;
138 for (int i = trans->num - 1; 0 <= i && TRANSITION_IS_GOTO (trans, i); --i)
139 {
140 goto_number k = temp_map[TRANSITION_SYMBOL (trans, i) - ntokens]++;
141 from_state[k] = s;
142 to_state[k] = trans->states[i]->number;
143 }
144 }
145
146 free (temp_map);
147
148 if (trace_flag & trace_automaton)
149 for (int i = 0; i < ngotos; ++i)
150 {
151 goto_print (i, stderr);
152 fputc ('\n', stderr);
153 }
154 }
155
156
157 goto_number
map_goto(state_number src,symbol_number sym)158 map_goto (state_number src, symbol_number sym)
159 {
160 goto_number low = goto_map[sym - ntokens];
161 goto_number high = goto_map[sym - ntokens + 1] - 1;
162
163 for (;;)
164 {
165 aver (low <= high);
166 goto_number middle = (low + high) / 2;
167 state_number s = from_state[middle];
168 if (s == src)
169 return middle;
170 else if (s < src)
171 low = middle + 1;
172 else
173 high = middle - 1;
174 }
175 }
176
177 /* Print FOLLOWS for debugging. */
178 static void
follows_print(const char * title,FILE * out)179 follows_print (const char* title, FILE *out)
180 {
181 fprintf (out, "%s:\n", title);
182 for (goto_number i = 0; i < ngotos; ++i)
183 {
184 fputs (" FOLLOWS[", out);
185 goto_print (i, out);
186 fputs ("] =", out);
187 bitset_iterator iter;
188 symbol_number sym;
189 BITSET_FOR_EACH (iter, goto_follows[i], sym, 0)
190 fprintf (out, " %s", symbols[sym]->tag);
191 fputc ('\n', out);
192 }
193 fputc ('\n', out);
194 }
195
196 /* Build goto_follows. */
197 static void
initialize_goto_follows(void)198 initialize_goto_follows (void)
199 {
200 goto_number **reads = xnmalloc (ngotos, sizeof *reads);
201 goto_number *edge = xnmalloc (ngotos, sizeof *edge);
202
203 goto_follows = bitsetv_create (ngotos, ntokens, BITSET_FIXED);
204
205 for (goto_number i = 0; i < ngotos; ++i)
206 {
207 state_number dst = to_state[i];
208 const transitions *trans = states[dst]->transitions;
209
210 int j;
211 FOR_EACH_SHIFT (trans, j)
212 bitset_set (goto_follows[i], TRANSITION_SYMBOL (trans, j));
213
214 /* Gotos outgoing from DST. */
215 goto_number nedges = 0;
216 for (; j < trans->num; ++j)
217 {
218 symbol_number sym = TRANSITION_SYMBOL (trans, j);
219 if (nullable[sym - ntokens])
220 {
221 assert (nedges < ngotos);
222 edge[nedges++] = map_goto (dst, sym);
223 }
224 }
225
226 if (nedges == 0)
227 reads[i] = NULL;
228 else
229 {
230 reads[i] = xnmalloc (nedges + 1, sizeof reads[i][0]);
231 memcpy (reads[i], edge, nedges * sizeof edge[0]);
232 reads[i][nedges] = END_NODE;
233 }
234 }
235 if (trace_flag & trace_automaton)
236 {
237 follows_print ("follows after shifts", stderr);
238 relation_print ("reads", reads, ngotos, goto_print, stderr);
239 }
240
241 relation_digraph (reads, ngotos, goto_follows);
242 if (trace_flag & trace_automaton)
243 follows_print ("follows after read", stderr);
244
245 for (goto_number i = 0; i < ngotos; ++i)
246 free (reads[i]);
247 free (reads);
248 free (edge);
249 }
250
251
252 /* Find the state which LOOKBACK[LOOKBACK_INDEX] is about. */
253 static const state *
lookback_find_state(int lookback_index)254 lookback_find_state (int lookback_index)
255 {
256 state *res = NULL;
257 for (int j = 0; j < nstates; ++j)
258 if (states[j]->reductions
259 && states[j]->reductions->lookaheads)
260 {
261 if (states[j]->reductions->lookaheads - LA > lookback_index)
262 /* Went too far. */
263 break;
264 else
265 res = states[j];
266 }
267 /* Pacify "potential null pointer dereference" warning. */
268 if (!res)
269 abort ();
270 return res;
271 }
272
273 /* Print LOOKBACK for debugging. */
274 static void
lookback_print(FILE * out)275 lookback_print (FILE *out)
276 {
277 fputs ("lookback:\n", out);
278 for (int i = 0; i < nLA; ++i)
279 if (lookback[i])
280 {
281 fprintf (out, " %3d = ", i);
282 const state *s = lookback_find_state (i);
283 int rnum = i - (s->reductions->lookaheads - LA);
284 const rule *r = s->reductions->rules[rnum];
285 fprintf (out, "(%3d, ", s->number);
286 rule_print (r, NULL, out);
287 fputs (") ->", out);
288 for (goto_list *sp = lookback[i]; sp; sp = sp->next)
289 {
290 fputc (' ', out);
291 goto_print (sp->value, out);
292 }
293 fputc ('\n', out);
294 }
295 fputc ('\n', out);
296 }
297
298 /* Add (S, R) -> GOTONO to LOOKBACK.
299
300 "(q, A → ω) lookback (p, A)" iff state p reaches state q on label ω.
301
302 The goto number GOTONO, whose source is S (which is
303 inconsistent), */
304 static void
add_lookback_edge(state * s,rule const * r,goto_number gotono)305 add_lookback_edge (state *s, rule const *r, goto_number gotono)
306 {
307 int ri = state_reduction_find (s, r);
308 int idx = (s->reductions->lookaheads - LA) + ri;
309 lookback[idx] = goto_list_new (gotono, lookback[idx]);
310 }
311
312
313 /* Compute INCLUDES and LOOKBACK. Corresponds to step E in Sec. 6 of
314 [DeRemer 1982]. */
315 static void
build_relations(void)316 build_relations (void)
317 {
318 goto_number *edge = xnmalloc (ngotos, sizeof *edge);
319 state_number *path = xnmalloc (ritem_longest_rhs () + 1, sizeof *path);
320
321 includes = xnmalloc (ngotos, sizeof *includes);
322
323 /* For each goto (from SRC to DST labeled by nterm VAR), iterate
324 over each rule with VAR as LHS, and find the path PATH from SRC
325 labeled with the RHS of the rule. */
326 for (goto_number i = 0; i < ngotos; ++i)
327 {
328 const state_number src = from_state[i];
329 const state_number dst = to_state[i];
330 symbol_number var = states[dst]->accessing_symbol;
331
332 /* Size of EDGE. */
333 int nedges = 0;
334 for (rule **rulep = derives[var - ntokens]; *rulep; ++rulep)
335 {
336 rule const *r = *rulep;
337 state *s = states[src];
338 path[0] = s->number;
339
340 /* Length of PATH. */
341 int length = 1;
342 for (item_number const *rp = r->rhs; 0 <= *rp; rp++)
343 {
344 symbol_number sym = item_number_as_symbol_number (*rp);
345 s = transitions_to (s, sym);
346 path[length++] = s->number;
347 }
348
349 /* S is the end of PATH. */
350 if (!s->consistent)
351 add_lookback_edge (s, r, i);
352
353 /* Walk back PATH from penultimate to beginning.
354
355 The "0 <= p" part is actually useless: each rhs ends in a
356 rule number (for which ISVAR(...) is false), and there is
357 a sentinel (ritem[-1]=0) before the first rhs. */
358 for (int p = length - 2; 0 <= p && ISVAR (r->rhs[p]); --p)
359 {
360 symbol_number sym = item_number_as_symbol_number (r->rhs[p]);
361 goto_number g = map_goto (path[p], sym);
362 /* Insert G if not already in EDGE.
363 FIXME: linear search. A bitset instead? */
364 {
365 bool found = false;
366 for (int j = 0; !found && j < nedges; ++j)
367 found = edge[j] == g;
368 if (!found)
369 {
370 assert (nedges < ngotos);
371 edge[nedges++] = g;
372 }
373 }
374 if (!nullable[sym - ntokens])
375 break;
376 }
377 }
378
379 if (trace_flag & trace_automaton)
380 {
381 goto_print (i, stderr);
382 fputs (" edges = ", stderr);
383 for (int j = 0; j < nedges; ++j)
384 {
385 fputc (' ', stderr);
386 goto_print (edge[j], stderr);
387 }
388 fputc ('\n', stderr);
389 }
390
391 if (nedges == 0)
392 includes[i] = NULL;
393 else
394 {
395 includes[i] = xnmalloc (nedges + 1, sizeof includes[i][0]);
396 for (int j = 0; j < nedges; ++j)
397 includes[i][j] = edge[j];
398 includes[i][nedges] = END_NODE;
399 }
400 }
401
402 free (edge);
403 free (path);
404
405 relation_transpose (&includes, ngotos);
406 if (trace_flag & trace_automaton)
407 relation_print ("includes", includes, ngotos, goto_print, stderr);
408 }
409
410 /* Compute FOLLOWS from INCLUDES, and free INCLUDES. */
411 static void
compute_follows(void)412 compute_follows (void)
413 {
414 relation_digraph (includes, ngotos, goto_follows);
415 if (trace_flag & trace_sets)
416 follows_print ("follows after includes", stderr);
417 for (goto_number i = 0; i < ngotos; ++i)
418 free (includes[i]);
419 free (includes);
420 }
421
422
423 static void
compute_lookaheads(void)424 compute_lookaheads (void)
425 {
426 if (trace_flag & trace_automaton)
427 lookback_print (stderr);
428
429 for (size_t i = 0; i < nLA; ++i)
430 for (goto_list *sp = lookback[i]; sp; sp = sp->next)
431 bitset_or (LA[i], LA[i], goto_follows[sp->value]);
432
433 /* Free LOOKBACK. */
434 for (size_t i = 0; i < nLA; ++i)
435 LIST_FREE (goto_list, lookback[i]);
436 free (lookback);
437 }
438
439
440 /*------------------------------------------------------.
441 | Count the number of lookahead tokens required for S. |
442 `------------------------------------------------------*/
443
444 static int
state_lookaheads_count(state * s,bool default_reduction_only_for_accept)445 state_lookaheads_count (state *s, bool default_reduction_only_for_accept)
446 {
447 const reductions *reds = s->reductions;
448 const transitions *trans = s->transitions;
449
450 /* Transitions are only disabled during conflict resolution, and that
451 hasn't happened yet, so there should be no need to check that
452 transition 0 hasn't been disabled before checking if it is a shift.
453 However, this check was performed at one time, so we leave it as an
454 aver. */
455 aver (trans->num == 0 || !TRANSITION_IS_DISABLED (trans, 0));
456
457 /* We need a lookahead either to distinguish different reductions
458 (i.e., there are two or more), or to distinguish a reduction from a
459 shift. Otherwise, it is straightforward, and the state is
460 'consistent'. However, do not treat a state with any reductions as
461 consistent unless it is the accepting state (because there is never
462 a lookahead token that makes sense there, and so no lookahead token
463 should be read) if the user has otherwise disabled default
464 reductions. */
465 s->consistent =
466 !(reds->num > 1
467 || (reds->num == 1 && trans->num && TRANSITION_IS_SHIFT (trans, 0))
468 || (reds->num == 1 && reds->rules[0]->number != 0
469 && default_reduction_only_for_accept));
470
471 return s->consistent ? 0 : reds->num;
472 }
473
474
475 /*----------------------------------------------.
476 | Compute LA, NLA, and the lookaheads members. |
477 `----------------------------------------------*/
478
479 void
initialize_LA(void)480 initialize_LA (void)
481 {
482 bool default_reduction_only_for_accept;
483 {
484 char *default_reductions =
485 muscle_percent_define_get ("lr.default-reduction");
486 default_reduction_only_for_accept = STREQ (default_reductions, "accepting");
487 free (default_reductions);
488 }
489
490 /* Compute the total number of reductions requiring a lookahead. */
491 nLA = 0;
492 for (state_number i = 0; i < nstates; ++i)
493 nLA += state_lookaheads_count (states[i],
494 default_reduction_only_for_accept);
495 /* Avoid having to special case 0. */
496 if (!nLA)
497 nLA = 1;
498
499 bitsetv pLA = LA = bitsetv_create (nLA, ntokens, BITSET_FIXED);
500
501 /* Initialize the members LOOKAHEADS for each state whose reductions
502 require lookahead tokens. */
503 for (state_number i = 0; i < nstates; ++i)
504 {
505 int count = state_lookaheads_count (states[i],
506 default_reduction_only_for_accept);
507 if (count)
508 {
509 states[i]->reductions->lookaheads = pLA;
510 pLA += count;
511 }
512 }
513 }
514
515
516 /*---------------------------------------------.
517 | Output the lookahead tokens for each state. |
518 `---------------------------------------------*/
519
520 static void
lookaheads_print(FILE * out)521 lookaheads_print (FILE *out)
522 {
523 fputs ("Lookaheads:\n", out);
524 for (state_number i = 0; i < nstates; ++i)
525 {
526 const reductions *reds = states[i]->reductions;
527 if (reds->num)
528 {
529 fprintf (out, " State %d:\n", i);
530 for (int j = 0; j < reds->num; ++j)
531 {
532 fprintf (out, " rule %d:", reds->rules[j]->number);
533 if (reds->lookaheads)
534 {
535 bitset_iterator iter;
536 int k;
537 BITSET_FOR_EACH (iter, reds->lookaheads[j], k, 0)
538 fprintf (out, " %s", symbols[k]->tag);
539 }
540 fputc ('\n', out);
541 }
542 }
543 }
544 fputc ('\n', out);
545 }
546
547 void
lalr(void)548 lalr (void)
549 {
550 if (trace_flag & trace_automaton)
551 {
552 fputc ('\n', stderr);
553 begin_use_class ("trace0", stderr);
554 fprintf (stderr, "lalr: begin");
555 end_use_class ("trace0", stderr);
556 fputc ('\n', stderr);
557 }
558 initialize_LA ();
559 set_goto_map ();
560 initialize_goto_follows ();
561 lookback = xcalloc (nLA, sizeof *lookback);
562 build_relations ();
563 compute_follows ();
564 compute_lookaheads ();
565
566 if (trace_flag & trace_sets)
567 lookaheads_print (stderr);
568 if (trace_flag & trace_automaton)
569 {
570 begin_use_class ("trace0", stderr);
571 fprintf (stderr, "lalr: done");
572 end_use_class ("trace0", stderr);
573 fputc ('\n', stderr);
574 }
575 }
576
577
578 void
lalr_update_state_numbers(state_number old_to_new[],state_number nstates_old)579 lalr_update_state_numbers (state_number old_to_new[], state_number nstates_old)
580 {
581 goto_number ngotos_reachable = 0;
582 symbol_number nonterminal = 0;
583 aver (nsyms == nnterms + ntokens);
584
585 for (goto_number i = 0; i < ngotos; ++i)
586 {
587 while (i == goto_map[nonterminal])
588 goto_map[nonterminal++] = ngotos_reachable;
589 /* If old_to_new[from_state[i]] = nstates_old, remove this goto
590 entry. */
591 if (old_to_new[from_state[i]] != nstates_old)
592 {
593 /* from_state[i] is not removed, so it and thus to_state[i] are
594 reachable, so to_state[i] != nstates_old. */
595 aver (old_to_new[to_state[i]] != nstates_old);
596 from_state[ngotos_reachable] = old_to_new[from_state[i]];
597 to_state[ngotos_reachable] = old_to_new[to_state[i]];
598 ++ngotos_reachable;
599 }
600 }
601 while (nonterminal <= nnterms)
602 {
603 aver (ngotos == goto_map[nonterminal]);
604 goto_map[nonterminal++] = ngotos_reachable;
605 }
606 ngotos = ngotos_reachable;
607 }
608
609
610 void
lalr_free(void)611 lalr_free (void)
612 {
613 for (state_number s = 0; s < nstates; ++s)
614 states[s]->reductions->lookaheads = NULL;
615 bitsetv_free (LA);
616 }
617