xref: /openbsd/usr.bin/yacc/lalr.c (revision cca36db2)
1 /*	$OpenBSD: lalr.c,v 1.11 2012/03/03 19:15:00 nicm Exp $	*/
2 /*	$NetBSD: lalr.c,v 1.4 1996/03/19 03:21:33 jtc Exp $	*/
3 
4 /*
5  * Copyright (c) 1989 The Regents of the University of California.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Robert Paul Corbett.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include "defs.h"
37 
38 typedef
39   struct shorts
40     {
41       struct shorts *next;
42       short value;
43     }
44   shorts;
45 
46 int tokensetsize;
47 short *lookaheads;
48 short *LAruleno;
49 unsigned *LA;
50 short *accessing_symbol;
51 core **state_table;
52 shifts **shift_table;
53 reductions **reduction_table;
54 short *goto_map;
55 short *from_state;
56 short *to_state;
57 
58 short **transpose();
59 void set_state_table(void);
60 void set_accessing_symbol(void);
61 void set_shift_table(void);
62 void set_reduction_table(void);
63 void set_maxrhs(void);
64 void initialize_LA(void);
65 void set_goto_map(void);
66 void initialize_F(void);
67 void build_relations(void);
68 void compute_FOLLOWS(void);
69 void compute_lookaheads(void);
70 int map_goto(int, int);
71 void digraph(short **);
72 void add_lookback_edge(int, int, int);
73 void traverse(int);
74 
75 static int infinity;
76 static int maxrhs;
77 static int ngotos;
78 static unsigned *F;
79 static short **includes;
80 static shorts **lookback;
81 static short **R;
82 static short *INDEX;
83 static short *VERTICES;
84 static int top;
85 
86 void
87 lalr(void)
88 {
89     tokensetsize = WORDSIZE(ntokens);
90 
91     set_state_table();
92     set_accessing_symbol();
93     set_shift_table();
94     set_reduction_table();
95     set_maxrhs();
96     initialize_LA();
97     set_goto_map();
98     initialize_F();
99     build_relations();
100     compute_FOLLOWS();
101     compute_lookaheads();
102     free_derives();
103     free_nullable();
104 }
105 
106 
107 void
108 set_state_table(void)
109 {
110     core *sp;
111 
112     state_table = NEW2(nstates, core *);
113     for (sp = first_state; sp; sp = sp->next)
114 	state_table[sp->number] = sp;
115 }
116 
117 
118 void
119 set_accessing_symbol(void)
120 {
121     core *sp;
122 
123     accessing_symbol = NEW2(nstates, short);
124     for (sp = first_state; sp; sp = sp->next)
125 	accessing_symbol[sp->number] = sp->accessing_symbol;
126 }
127 
128 
129 void
130 set_shift_table(void)
131 {
132     shifts *sp;
133 
134     shift_table = NEW2(nstates, shifts *);
135     for (sp = first_shift; sp; sp = sp->next)
136 	shift_table[sp->number] = sp;
137 }
138 
139 
140 void
141 set_reduction_table(void)
142 {
143     reductions *rp;
144 
145     reduction_table = NEW2(nstates, reductions *);
146     for (rp = first_reduction; rp; rp = rp->next)
147 	reduction_table[rp->number] = rp;
148 }
149 
150 
151 void
152 set_maxrhs(void)
153 {
154   short *itemp;
155   short *item_end;
156   int length;
157   int max;
158 
159   length = 0;
160   max = 0;
161   item_end = ritem + nitems;
162   for (itemp = ritem; itemp < item_end; itemp++)
163     {
164       if (*itemp >= 0)
165 	{
166 	  length++;
167 	}
168       else
169 	{
170 	  if (length > max) max = length;
171 	  length = 0;
172 	}
173     }
174 
175   maxrhs = max;
176 }
177 
178 
179 void
180 initialize_LA(void)
181 {
182   int i, j, k;
183   reductions *rp;
184 
185   lookaheads = NEW2(nstates + 1, short);
186 
187   k = 0;
188   for (i = 0; i < nstates; i++)
189     {
190       lookaheads[i] = k;
191       rp = reduction_table[i];
192       if (rp)
193 	k += rp->nreds;
194     }
195   lookaheads[nstates] = k;
196 
197   LA = NEW2(k * tokensetsize, unsigned);
198   LAruleno = NEW2(k, short);
199   lookback = NEW2(k, shorts *);
200 
201   k = 0;
202   for (i = 0; i < nstates; i++)
203     {
204       rp = reduction_table[i];
205       if (rp)
206 	{
207 	  for (j = 0; j < rp->nreds; j++)
208 	    {
209 	      LAruleno[k] = rp->rules[j];
210 	      k++;
211 	    }
212 	}
213     }
214 }
215 
216 void
217 set_goto_map(void)
218 {
219   shifts *sp;
220   int i;
221   int symbol;
222   int k;
223   short *temp_map;
224   int state2;
225   int state1;
226 
227   goto_map = NEW2(nvars + 1, short) - ntokens;
228   temp_map = NEW2(nvars + 1, short) - ntokens;
229 
230   ngotos = 0;
231   for (sp = first_shift; sp; sp = sp->next)
232     {
233       for (i = sp->nshifts - 1; i >= 0; i--)
234 	{
235 	  symbol = accessing_symbol[sp->shift[i]];
236 
237 	  if (ISTOKEN(symbol)) break;
238 
239 	  if (ngotos == MAXSHORT)
240 	    fatal("too many gotos");
241 
242 	  ngotos++;
243 	  goto_map[symbol]++;
244         }
245     }
246 
247   k = 0;
248   for (i = ntokens; i < nsyms; i++)
249     {
250       temp_map[i] = k;
251       k += goto_map[i];
252     }
253 
254   for (i = ntokens; i < nsyms; i++)
255     goto_map[i] = temp_map[i];
256 
257   goto_map[nsyms] = ngotos;
258   temp_map[nsyms] = ngotos;
259 
260   from_state = NEW2(ngotos, short);
261   to_state = NEW2(ngotos, short);
262 
263   for (sp = first_shift; sp; sp = sp->next)
264     {
265       state1 = sp->number;
266       for (i = sp->nshifts - 1; i >= 0; i--)
267 	{
268 	  state2 = sp->shift[i];
269 	  symbol = accessing_symbol[state2];
270 
271 	  if (ISTOKEN(symbol)) break;
272 
273 	  k = temp_map[symbol]++;
274 	  from_state[k] = state1;
275 	  to_state[k] = state2;
276 	}
277     }
278 
279   FREE(temp_map + ntokens);
280 }
281 
282 
283 
284 /*  Map_goto maps a state/symbol pair into its numeric representation.	*/
285 
286 int
287 map_goto(int state, int symbol)
288 {
289     int high;
290     int low;
291     int middle;
292     int s;
293 
294     low = goto_map[symbol];
295     high = goto_map[symbol + 1];
296 
297     for (;;)
298     {
299 	assert(low <= high);
300 	middle = (low + high) >> 1;
301 	s = from_state[middle];
302 	if (s == state)
303 	    return (middle);
304 	else if (s < state)
305 	    low = middle + 1;
306 	else
307 	    high = middle - 1;
308     }
309 }
310 
311 
312 void
313 initialize_F(void)
314 {
315   int i;
316   int j;
317   int k;
318   shifts *sp;
319   short *edge;
320   unsigned *rowp;
321   short *rp;
322   short **reads;
323   int nedges;
324   int stateno;
325   int symbol;
326   int nwords;
327 
328   nwords = ngotos * tokensetsize;
329   F = NEW2(nwords, unsigned);
330 
331   reads = NEW2(ngotos, short *);
332   edge = NEW2(ngotos + 1, short);
333   nedges = 0;
334 
335   rowp = F;
336   for (i = 0; i < ngotos; i++)
337     {
338       stateno = to_state[i];
339       sp = shift_table[stateno];
340 
341       if (sp)
342 	{
343 	  k = sp->nshifts;
344 
345 	  for (j = 0; j < k; j++)
346 	    {
347 	      symbol = accessing_symbol[sp->shift[j]];
348 	      if (ISVAR(symbol))
349 		break;
350 	      SETBIT(rowp, symbol);
351 	    }
352 
353 	  for (; j < k; j++)
354 	    {
355 	      symbol = accessing_symbol[sp->shift[j]];
356 	      if (nullable[symbol])
357 		edge[nedges++] = map_goto(stateno, symbol);
358 	    }
359 
360 	  if (nedges)
361 	    {
362 	      reads[i] = rp = NEW2(nedges + 1, short);
363 
364 	      for (j = 0; j < nedges; j++)
365 		rp[j] = edge[j];
366 
367 	      rp[nedges] = -1;
368 	      nedges = 0;
369 	    }
370 	}
371 
372       rowp += tokensetsize;
373     }
374 
375   SETBIT(F, 0);
376   digraph(reads);
377 
378   for (i = 0; i < ngotos; i++)
379     {
380       if (reads[i])
381 	FREE(reads[i]);
382     }
383 
384   FREE(reads);
385   FREE(edge);
386 }
387 
388 
389 void
390 build_relations(void)
391 {
392   int i;
393   int j;
394   int k;
395   short *rulep;
396   short *rp;
397   shifts *sp;
398   int length;
399   int nedges;
400   int done;
401   int state1;
402   int stateno;
403   int symbol1;
404   int symbol2;
405   short *shortp;
406   short *edge;
407   short *states;
408   short **new_includes;
409 
410   includes = NEW2(ngotos, short *);
411   edge = NEW2(ngotos + 1, short);
412   states = NEW2(maxrhs + 1, short);
413 
414   for (i = 0; i < ngotos; i++)
415     {
416       nedges = 0;
417       state1 = from_state[i];
418       symbol1 = accessing_symbol[to_state[i]];
419 
420       for (rulep = derives[symbol1]; *rulep >= 0; rulep++)
421 	{
422 	  length = 1;
423 	  states[0] = state1;
424 	  stateno = state1;
425 
426 	  for (rp = ritem + rrhs[*rulep]; *rp >= 0; rp++)
427 	    {
428 	      symbol2 = *rp;
429 	      sp = shift_table[stateno];
430 	      k = sp->nshifts;
431 
432 	      for (j = 0; j < k; j++)
433 		{
434 		  stateno = sp->shift[j];
435 		  if (accessing_symbol[stateno] == symbol2) break;
436 		}
437 
438 	      states[length++] = stateno;
439 	    }
440 
441 	  add_lookback_edge(stateno, *rulep, i);
442 
443 	  length--;
444 	  done = 0;
445 	  while (!done)
446 	    {
447 	      done = 1;
448 	      rp--;
449 	      if (ISVAR(*rp))
450 		{
451 		  stateno = states[--length];
452 		  edge[nedges++] = map_goto(stateno, *rp);
453 		  if (nullable[*rp] && length > 0) done = 0;
454 		}
455 	    }
456 	}
457 
458       if (nedges)
459 	{
460 	  includes[i] = shortp = NEW2(nedges + 1, short);
461 	  for (j = 0; j < nedges; j++)
462 	    shortp[j] = edge[j];
463 	  shortp[nedges] = -1;
464 	}
465     }
466 
467   new_includes = transpose(includes, ngotos);
468 
469   for (i = 0; i < ngotos; i++)
470     if (includes[i])
471       FREE(includes[i]);
472 
473   FREE(includes);
474 
475   includes = new_includes;
476 
477   FREE(edge);
478   FREE(states);
479 }
480 
481 void
482 add_lookback_edge(int stateno, int ruleno, int gotono)
483 {
484     int i, k;
485     int found;
486     shorts *sp;
487 
488     i = lookaheads[stateno];
489     k = lookaheads[stateno + 1];
490     found = 0;
491     while (!found && i < k)
492     {
493 	if (LAruleno[i] == ruleno)
494 	    found = 1;
495 	else
496 	    ++i;
497     }
498     assert(found);
499 
500     sp = NEW(shorts);
501     sp->next = lookback[i];
502     sp->value = gotono;
503     lookback[i] = sp;
504 }
505 
506 
507 
508 short **
509 transpose(short **R, int n)
510 {
511   short **new_R;
512   short **temp_R;
513   short *nedges;
514   short *sp;
515   int i;
516   int k;
517 
518   nedges = NEW2(n, short);
519 
520   for (i = 0; i < n; i++)
521     {
522       sp = R[i];
523       if (sp)
524 	{
525 	  while (*sp >= 0)
526 	    nedges[*sp++]++;
527 	}
528     }
529 
530   new_R = NEW2(n, short *);
531   temp_R = NEW2(n, short *);
532 
533   for (i = 0; i < n; i++)
534     {
535       k = nedges[i];
536       if (k > 0)
537 	{
538 	  sp = NEW2(k + 1, short);
539 	  new_R[i] = sp;
540 	  temp_R[i] = sp;
541 	  sp[k] = -1;
542 	}
543     }
544 
545   FREE(nedges);
546 
547   for (i = 0; i < n; i++)
548     {
549       sp = R[i];
550       if (sp)
551 	{
552 	  while (*sp >= 0)
553 	    *temp_R[*sp++]++ = i;
554 	}
555     }
556 
557   FREE(temp_R);
558 
559   return (new_R);
560 }
561 
562 
563 void
564 compute_FOLLOWS(void)
565 {
566   digraph(includes);
567 }
568 
569 void
570 compute_lookaheads(void)
571 {
572   int i, n;
573   unsigned *fp1, *fp2, *fp3;
574   shorts *sp, *next;
575   unsigned *rowp;
576 
577   rowp = LA;
578   n = lookaheads[nstates];
579   for (i = 0; i < n; i++)
580     {
581       fp3 = rowp + tokensetsize;
582       for (sp = lookback[i]; sp; sp = sp->next)
583 	{
584 	  fp1 = rowp;
585 	  fp2 = F + tokensetsize * sp->value;
586 	  while (fp1 < fp3)
587 	    *fp1++ |= *fp2++;
588 	}
589       rowp = fp3;
590     }
591 
592   for (i = 0; i < n; i++)
593     for (sp = lookback[i]; sp; sp = next)
594       {
595         next = sp->next;
596         FREE(sp);
597       }
598 
599   FREE(lookback);
600   FREE(F);
601 }
602 
603 void
604 digraph(short **relation)
605 {
606   int i;
607 
608   infinity = ngotos + 2;
609   INDEX = NEW2(ngotos + 1, short);
610   VERTICES = NEW2(ngotos + 1, short);
611   top = 0;
612   R = relation;
613 
614   memset(INDEX, 0, ngotos * sizeof(short));
615   for (i = 0; i < ngotos; i++)
616       if (R[i])
617           traverse(i);
618 
619   FREE(INDEX);
620   FREE(VERTICES);
621 }
622 
623 
624 void
625 traverse(int i)
626 {
627   unsigned *fp1;
628   unsigned *fp2;
629   unsigned *fp3;
630   int j;
631   short *rp;
632 
633   int height;
634   unsigned *base;
635 
636   VERTICES[++top] = i;
637   INDEX[i] = height = top;
638 
639   base = F + i * tokensetsize;
640   fp3 = base + tokensetsize;
641 
642   rp = R[i];
643   if (rp)
644     {
645       while ((j = *rp++) >= 0)
646 	{
647 	  if (INDEX[j] == 0)
648 	    traverse(j);
649 
650 	  if (INDEX[i] > INDEX[j])
651 	    INDEX[i] = INDEX[j];
652 
653 	  fp1 = base;
654 	  fp2 = F + j * tokensetsize;
655 
656 	  while (fp1 < fp3)
657 	    *fp1++ |= *fp2++;
658 	}
659     }
660 
661   if (INDEX[i] == height)
662     {
663       for (;;)
664 	{
665 	  j = VERTICES[top--];
666 	  INDEX[j] = infinity;
667 
668 	  if (i == j)
669 	    break;
670 
671 	  fp1 = base;
672 	  fp2 = F + j * tokensetsize;
673 
674 	  while (fp1 < fp3)
675 	    *fp2++ = *fp1++;
676 	}
677     }
678 }
679