xref: /original-bsd/usr.bin/yacc/lalr.c (revision b485b642)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Robert Paul Corbett.
7  *
8  * Redistribution and use in source and binary forms are permitted
9  * provided that the above copyright notice and this paragraph are
10  * duplicated in all such forms and that any documentation,
11  * advertising materials, and other materials related to such
12  * distribution and use acknowledge that the software was developed
13  * by the University of California, Berkeley.  The name of the
14  * University may not be used to endorse or promote products derived
15  * from this software without specific prior written permission.
16  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
19  */
20 
21 #ifndef lint
22 static char sccsid[] = "@(#)lalr.c	5.1 (Berkeley) 12/25/89";
23 #endif /* not lint */
24 
25 #include "defs.h"
26 
27 typedef
28   struct shorts
29     {
30       struct shorts *next;
31       short value;
32     }
33   shorts;
34 
35 int tokensetsize;
36 short *lookaheads;
37 short *LAruleno;
38 unsigned *LA;
39 short *accessing_symbol;
40 core **state_table;
41 shifts **shift_table;
42 reductions **reduction_table;
43 short *goto_map;
44 short *from_state;
45 short *to_state;
46 
47 short **transpose();
48 
49 static int infinity;
50 static int maxrhs;
51 static int ngotos;
52 static unsigned *F;
53 static short **includes;
54 static shorts **lookback;
55 static short **R;
56 static short *INDEX;
57 static short *VERTICES;
58 static int top;
59 
60 
61 lalr()
62 {
63     tokensetsize = WORDSIZE(ntokens);
64 
65     set_state_table();
66     set_accessing_symbol();
67     set_shift_table();
68     set_reduction_table();
69     set_maxrhs();
70     initialize_LA();
71     set_goto_map();
72     initialize_F();
73     build_relations();
74     compute_FOLLOWS();
75     compute_lookaheads();
76 }
77 
78 
79 
80 set_state_table()
81 {
82     register core *sp;
83 
84     state_table = NEW2(nstates, core *);
85     for (sp = first_state; sp; sp = sp->next)
86 	state_table[sp->number] = sp;
87 }
88 
89 
90 
91 set_accessing_symbol()
92 {
93     register core *sp;
94 
95     accessing_symbol = NEW2(nstates, short);
96     for (sp = first_state; sp; sp = sp->next)
97 	accessing_symbol[sp->number] = sp->accessing_symbol;
98 }
99 
100 
101 
102 set_shift_table()
103 {
104     register shifts *sp;
105 
106     shift_table = NEW2(nstates, shifts *);
107     for (sp = first_shift; sp; sp = sp->next)
108 	shift_table[sp->number] = sp;
109 }
110 
111 
112 
113 set_reduction_table()
114 {
115     register reductions *rp;
116 
117     reduction_table = NEW2(nstates, reductions *);
118     for (rp = first_reduction; rp; rp = rp->next)
119 	reduction_table[rp->number] = rp;
120 }
121 
122 
123 
124 set_maxrhs()
125 {
126   register short *itemp;
127   register short *item_end;
128   register int length;
129   register int max;
130 
131   length = 0;
132   max = 0;
133   item_end = ritem + nitems;
134   for (itemp = ritem; itemp < item_end; itemp++)
135     {
136       if (*itemp >= 0)
137 	{
138 	  length++;
139 	}
140       else
141 	{
142 	  if (length > max) max = length;
143 	  length = 0;
144 	}
145     }
146 
147   maxrhs = max;
148 }
149 
150 
151 
152 initialize_LA()
153 {
154   register int i, j, k;
155   register reductions *rp;
156 
157   lookaheads = NEW2(nstates + 1, short);
158 
159   k = 0;
160   for (i = 0; i < nstates; i++)
161     {
162       lookaheads[i] = k;
163       rp = reduction_table[i];
164       if (rp)
165 	k += rp->nreds;
166     }
167   lookaheads[nstates] = k;
168 
169   LA = NEW2(k * tokensetsize, unsigned);
170   LAruleno = NEW2(k, short);
171   lookback = NEW2(k, shorts *);
172 
173   k = 0;
174   for (i = 0; i < nstates; i++)
175     {
176       rp = reduction_table[i];
177       if (rp)
178 	{
179 	  for (j = 0; j < rp->nreds; j++)
180 	    {
181 	      LAruleno[k] = rp->rules[j];
182 	      k++;
183 	    }
184 	}
185     }
186 }
187 
188 
189 set_goto_map()
190 {
191   register shifts *sp;
192   register int i;
193   register int symbol;
194   register int k;
195   register short *temp_map;
196   register int state2;
197   register int state1;
198 
199   goto_map = NEW2(nvars + 1, short) - ntokens;
200   temp_map = NEW2(nvars + 1, short) - ntokens;
201 
202   ngotos = 0;
203   for (sp = first_shift; sp; sp = sp->next)
204     {
205       for (i = sp->nshifts - 1; i >= 0; i--)
206 	{
207 	  symbol = accessing_symbol[sp->shift[i]];
208 
209 	  if (ISTOKEN(symbol)) break;
210 
211 	  if (ngotos == MAXSHORT)
212 	    fatal("too many gotos");
213 
214 	  ngotos++;
215 	  goto_map[symbol]++;
216         }
217     }
218 
219   k = 0;
220   for (i = ntokens; i < nsyms; i++)
221     {
222       temp_map[i] = k;
223       k += goto_map[i];
224     }
225 
226   for (i = ntokens; i < nsyms; i++)
227     goto_map[i] = temp_map[i];
228 
229   goto_map[nsyms] = ngotos;
230   temp_map[nsyms] = ngotos;
231 
232   from_state = NEW2(ngotos, short);
233   to_state = NEW2(ngotos, short);
234 
235   for (sp = first_shift; sp; sp = sp->next)
236     {
237       state1 = sp->number;
238       for (i = sp->nshifts - 1; i >= 0; i--)
239 	{
240 	  state2 = sp->shift[i];
241 	  symbol = accessing_symbol[state2];
242 
243 	  if (ISTOKEN(symbol)) break;
244 
245 	  k = temp_map[symbol]++;
246 	  from_state[k] = state1;
247 	  to_state[k] = state2;
248 	}
249     }
250 
251   FREE(temp_map + ntokens);
252 }
253 
254 
255 
256 /*  Map_goto maps a state/symbol pair into its numeric representation.	*/
257 
258 int
259 map_goto(state, symbol)
260 int state;
261 int symbol;
262 {
263     register int high;
264     register int low;
265     register int middle;
266     register int s;
267 
268     low = goto_map[symbol];
269     high = goto_map[symbol + 1];
270 
271     for (;;)
272     {
273 	assert(low <= high);
274 	middle = (low + high) >> 1;
275 	s = from_state[middle];
276 	if (s == state)
277 	    return (middle);
278 	else if (s < state)
279 	    low = middle + 1;
280 	else
281 	    high = middle - 1;
282     }
283 }
284 
285 
286 
287 initialize_F()
288 {
289   register int i;
290   register int j;
291   register int k;
292   register shifts *sp;
293   register short *edge;
294   register unsigned *rowp;
295   register short *rp;
296   register short **reads;
297   register int nedges;
298   register int stateno;
299   register int symbol;
300   register int nwords;
301 
302   nwords = ngotos * tokensetsize;
303   F = NEW2(nwords, unsigned);
304 
305   reads = NEW2(ngotos, short *);
306   edge = NEW2(ngotos + 1, short);
307   nedges = 0;
308 
309   rowp = F;
310   for (i = 0; i < ngotos; i++)
311     {
312       stateno = to_state[i];
313       sp = shift_table[stateno];
314 
315       if (sp)
316 	{
317 	  k = sp->nshifts;
318 
319 	  for (j = 0; j < k; j++)
320 	    {
321 	      symbol = accessing_symbol[sp->shift[j]];
322 	      if (ISVAR(symbol))
323 		break;
324 	      SETBIT(rowp, symbol);
325 	    }
326 
327 	  for (; j < k; j++)
328 	    {
329 	      symbol = accessing_symbol[sp->shift[j]];
330 	      if (nullable[symbol])
331 		edge[nedges++] = map_goto(stateno, symbol);
332 	    }
333 
334 	  if (nedges)
335 	    {
336 	      reads[i] = rp = NEW2(nedges + 1, short);
337 
338 	      for (j = 0; j < nedges; j++)
339 		rp[j] = edge[j];
340 
341 	      rp[nedges] = -1;
342 	      nedges = 0;
343 	    }
344 	}
345 
346       rowp += tokensetsize;
347     }
348 
349   SETBIT(F, 0);
350   digraph(reads);
351 
352   for (i = 0; i < ngotos; i++)
353     {
354       if (reads[i])
355 	FREE(reads[i]);
356     }
357 
358   FREE(reads);
359   FREE(edge);
360 }
361 
362 
363 
364 build_relations()
365 {
366   register int i;
367   register int j;
368   register int k;
369   register short *rulep;
370   register short *rp;
371   register shifts *sp;
372   register int length;
373   register int nedges;
374   register int done;
375   register int state1;
376   register int stateno;
377   register int symbol1;
378   register int symbol2;
379   register short *shortp;
380   register short *edge;
381   register short *states;
382   register short **new_includes;
383 
384   includes = NEW2(ngotos, short *);
385   edge = NEW2(ngotos + 1, short);
386   states = NEW2(maxrhs + 1, short);
387 
388   for (i = 0; i < ngotos; i++)
389     {
390       nedges = 0;
391       state1 = from_state[i];
392       symbol1 = accessing_symbol[to_state[i]];
393 
394       for (rulep = derives[symbol1]; *rulep >= 0; rulep++)
395 	{
396 	  length = 1;
397 	  states[0] = state1;
398 	  stateno = state1;
399 
400 	  for (rp = ritem + rrhs[*rulep]; *rp >= 0; rp++)
401 	    {
402 	      symbol2 = *rp;
403 	      sp = shift_table[stateno];
404 	      k = sp->nshifts;
405 
406 	      for (j = 0; j < k; j++)
407 		{
408 		  stateno = sp->shift[j];
409 		  if (accessing_symbol[stateno] == symbol2) break;
410 		}
411 
412 	      states[length++] = stateno;
413 	    }
414 
415 	  add_lookback_edge(stateno, *rulep, i);
416 
417 	  length--;
418 	  done = 0;
419 	  while (!done)
420 	    {
421 	      done = 1;
422 	      rp--;
423 	      if (ISVAR(*rp))
424 		{
425 		  stateno = states[--length];
426 		  edge[nedges++] = map_goto(stateno, *rp);
427 		  if (nullable[*rp] && length > 0) done = 0;
428 		}
429 	    }
430 	}
431 
432       if (nedges)
433 	{
434 	  includes[i] = shortp = NEW2(nedges + 1, short);
435 	  for (j = 0; j < nedges; j++)
436 	    shortp[j] = edge[j];
437 	  shortp[nedges] = -1;
438 	}
439     }
440 
441   new_includes = transpose(includes, ngotos);
442 
443   for (i = 0; i < ngotos; i++)
444     FREE(includes[i]);
445 
446   FREE(includes);
447 
448   includes = new_includes;
449 
450   FREE(edge);
451   FREE(states);
452 }
453 
454 
455 add_lookback_edge(stateno, ruleno, gotono)
456 int stateno, ruleno, gotono;
457 {
458     register int i, k;
459     register int found;
460     register shorts *sp;
461 
462     i = lookaheads[stateno];
463     k = lookaheads[stateno + 1];
464     found = 0;
465     while (!found && i < k)
466     {
467 	if (LAruleno[i] == ruleno)
468 	    found = 1;
469 	else
470 	    ++i;
471     }
472     assert(found);
473 
474     sp = NEW(shorts);
475     sp->next = lookback[i];
476     sp->value = gotono;
477     lookback[i] = sp;
478 }
479 
480 
481 
482 short **
483 transpose(R, n)
484 short **R;
485 int n;
486 {
487   register short **new_R;
488   register short **temp_R;
489   register short *nedges;
490   register short *sp;
491   register int i;
492   register int k;
493 
494   nedges = NEW2(n, short);
495 
496   for (i = 0; i < n; i++)
497     {
498       sp = R[i];
499       if (sp)
500 	{
501 	  while (*sp >= 0)
502 	    nedges[*sp++]++;
503 	}
504     }
505 
506   new_R = NEW2(n, short *);
507   temp_R = NEW2(n, short *);
508 
509   for (i = 0; i < n; i++)
510     {
511       k = nedges[i];
512       if (k > 0)
513 	{
514 	  sp = NEW2(k + 1, short);
515 	  new_R[i] = sp;
516 	  temp_R[i] = sp;
517 	  sp[k] = -1;
518 	}
519     }
520 
521   FREE(nedges);
522 
523   for (i = 0; i < n; i++)
524     {
525       sp = R[i];
526       if (sp)
527 	{
528 	  while (*sp >= 0)
529 	    *temp_R[*sp++]++ = i;
530 	}
531     }
532 
533   FREE(temp_R);
534 
535   return (new_R);
536 }
537 
538 
539 
540 compute_FOLLOWS()
541 {
542   digraph(includes);
543 }
544 
545 
546 compute_lookaheads()
547 {
548   register int i, n;
549   register unsigned *fp1, *fp2, *fp3;
550   register shorts *sp;
551   register unsigned *rowp;
552 
553   rowp = LA;
554   n = lookaheads[nstates];
555   for (i = 0; i < n; i++)
556     {
557       fp3 = rowp + tokensetsize;
558       for (sp = lookback[i]; sp; sp = sp->next)
559 	{
560 	  fp1 = rowp;
561 	  fp2 = F + tokensetsize * sp->value;
562 	  while (fp1 < fp3)
563 	    *fp1++ |= *fp2++;
564 	}
565       rowp = fp3;
566     }
567 
568   for (i = 0; i < n; i++)
569     for (sp = lookback[i]; sp; sp = sp->next)
570       FREE(sp);
571 
572   FREE(lookback);
573   FREE(F);
574 }
575 
576 
577 digraph(relation)
578 short **relation;
579 {
580   register int i;
581 
582   infinity = ngotos + 2;
583   INDEX = NEW2(ngotos + 1, short);
584   VERTICES = NEW2(ngotos + 1, short);
585   top = 0;
586 
587   R = relation;
588 
589   for (i = 0; i < ngotos; i++)
590     INDEX[i] = 0;
591 
592   for (i = 0; i < ngotos; i++)
593     {
594       if (INDEX[i] == 0 && R[i])
595 	traverse(i);
596     }
597 
598   FREE(INDEX);
599   FREE(VERTICES);
600 }
601 
602 
603 
604 traverse(i)
605 register int i;
606 {
607   register unsigned *fp1;
608   register unsigned *fp2;
609   register unsigned *fp3;
610   register int j;
611   register short *rp;
612 
613   int height;
614   unsigned *base;
615 
616   VERTICES[++top] = i;
617   INDEX[i] = height = top;
618 
619   base = F + i * tokensetsize;
620   fp3 = base + tokensetsize;
621 
622   rp = R[i];
623   if (rp)
624     {
625       while ((j = *rp++) >= 0)
626 	{
627 	  if (INDEX[j] == 0)
628 	    traverse(j);
629 
630 	  if (INDEX[i] > INDEX[j])
631 	    INDEX[i] = INDEX[j];
632 
633 	  fp1 = base;
634 	  fp2 = F + j * tokensetsize;
635 
636 	  while (fp1 < fp3)
637 	    *fp1++ |= *fp2++;
638 	}
639     }
640 
641   if (INDEX[i] == height)
642     {
643       for (;;)
644 	{
645 	  j = VERTICES[top--];
646 	  INDEX[j] = infinity;
647 
648 	  if (i == j)
649 	    break;
650 
651 	  fp1 = base;
652 	  fp2 = F + j * tokensetsize;
653 
654 	  while (fp1 < fp3)
655 	    *fp2++ = *fp1++;
656 	}
657     }
658 }
659