xref: /original-bsd/usr.bin/yacc/lalr.c (revision 4b9b56dc)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Robert Paul Corbett.
7  *
8  * Redistribution and use in source and binary forms are permitted
9  * provided that the above copyright notice and this paragraph are
10  * duplicated in all such forms and that any documentation,
11  * advertising materials, and other materials related to such
12  * distribution and use acknowledge that the software was developed
13  * by the University of California, Berkeley.  The name of the
14  * University may not be used to endorse or promote products derived
15  * from this software without specific prior written permission.
16  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
19  */
20 
21 #ifndef lint
22 static char sccsid[] = "@(#)lalr.c	5.2 (Berkeley) 02/15/90";
23 #endif /* not lint */
24 
25 #include "defs.h"
26 
27 typedef
28   struct shorts
29     {
30       struct shorts *next;
31       short value;
32     }
33   shorts;
34 
35 int tokensetsize;
36 short *lookaheads;
37 short *LAruleno;
38 unsigned *LA;
39 short *accessing_symbol;
40 core **state_table;
41 shifts **shift_table;
42 reductions **reduction_table;
43 short *goto_map;
44 short *from_state;
45 short *to_state;
46 
47 short **transpose();
48 
49 static int infinity;
50 static int maxrhs;
51 static int ngotos;
52 static unsigned *F;
53 static short **includes;
54 static shorts **lookback;
55 static short **R;
56 static short *INDEX;
57 static short *VERTICES;
58 static int top;
59 
60 
61 lalr()
62 {
63     tokensetsize = WORDSIZE(ntokens);
64 
65     set_state_table();
66     set_accessing_symbol();
67     set_shift_table();
68     set_reduction_table();
69     set_maxrhs();
70     initialize_LA();
71     set_goto_map();
72     initialize_F();
73     build_relations();
74     compute_FOLLOWS();
75     compute_lookaheads();
76 }
77 
78 
79 
80 set_state_table()
81 {
82     register core *sp;
83 
84     state_table = NEW2(nstates, core *);
85     for (sp = first_state; sp; sp = sp->next)
86 	state_table[sp->number] = sp;
87 }
88 
89 
90 
91 set_accessing_symbol()
92 {
93     register core *sp;
94 
95     accessing_symbol = NEW2(nstates, short);
96     for (sp = first_state; sp; sp = sp->next)
97 	accessing_symbol[sp->number] = sp->accessing_symbol;
98 }
99 
100 
101 
102 set_shift_table()
103 {
104     register shifts *sp;
105 
106     shift_table = NEW2(nstates, shifts *);
107     for (sp = first_shift; sp; sp = sp->next)
108 	shift_table[sp->number] = sp;
109 }
110 
111 
112 
113 set_reduction_table()
114 {
115     register reductions *rp;
116 
117     reduction_table = NEW2(nstates, reductions *);
118     for (rp = first_reduction; rp; rp = rp->next)
119 	reduction_table[rp->number] = rp;
120 }
121 
122 
123 
124 set_maxrhs()
125 {
126   register short *itemp;
127   register short *item_end;
128   register int length;
129   register int max;
130 
131   length = 0;
132   max = 0;
133   item_end = ritem + nitems;
134   for (itemp = ritem; itemp < item_end; itemp++)
135     {
136       if (*itemp >= 0)
137 	{
138 	  length++;
139 	}
140       else
141 	{
142 	  if (length > max) max = length;
143 	  length = 0;
144 	}
145     }
146 
147   maxrhs = max;
148 }
149 
150 
151 
152 initialize_LA()
153 {
154   register int i, j, k;
155   register reductions *rp;
156 
157   lookaheads = NEW2(nstates + 1, short);
158 
159   k = 0;
160   for (i = 0; i < nstates; i++)
161     {
162       lookaheads[i] = k;
163       rp = reduction_table[i];
164       if (rp)
165 	k += rp->nreds;
166     }
167   lookaheads[nstates] = k;
168 
169   LA = NEW2(k * tokensetsize, unsigned);
170   LAruleno = NEW2(k, short);
171   lookback = NEW2(k, shorts *);
172 
173   k = 0;
174   for (i = 0; i < nstates; i++)
175     {
176       rp = reduction_table[i];
177       if (rp)
178 	{
179 	  for (j = 0; j < rp->nreds; j++)
180 	    {
181 	      LAruleno[k] = rp->rules[j];
182 	      k++;
183 	    }
184 	}
185     }
186 }
187 
188 
189 set_goto_map()
190 {
191   register shifts *sp;
192   register int i;
193   register int symbol;
194   register int k;
195   register short *temp_map;
196   register int state2;
197   register int state1;
198 
199   goto_map = NEW2(nvars + 1, short) - ntokens;
200   temp_map = NEW2(nvars + 1, short) - ntokens;
201 
202   ngotos = 0;
203   for (sp = first_shift; sp; sp = sp->next)
204     {
205       for (i = sp->nshifts - 1; i >= 0; i--)
206 	{
207 	  symbol = accessing_symbol[sp->shift[i]];
208 
209 	  if (ISTOKEN(symbol)) break;
210 
211 	  if (ngotos == MAXSHORT)
212 	    fatal("too many gotos");
213 
214 	  ngotos++;
215 	  goto_map[symbol]++;
216         }
217     }
218 
219   k = 0;
220   for (i = ntokens; i < nsyms; i++)
221     {
222       temp_map[i] = k;
223       k += goto_map[i];
224     }
225 
226   for (i = ntokens; i < nsyms; i++)
227     goto_map[i] = temp_map[i];
228 
229   goto_map[nsyms] = ngotos;
230   temp_map[nsyms] = ngotos;
231 
232   from_state = NEW2(ngotos, short);
233   to_state = NEW2(ngotos, short);
234 
235   for (sp = first_shift; sp; sp = sp->next)
236     {
237       state1 = sp->number;
238       for (i = sp->nshifts - 1; i >= 0; i--)
239 	{
240 	  state2 = sp->shift[i];
241 	  symbol = accessing_symbol[state2];
242 
243 	  if (ISTOKEN(symbol)) break;
244 
245 	  k = temp_map[symbol]++;
246 	  from_state[k] = state1;
247 	  to_state[k] = state2;
248 	}
249     }
250 
251   FREE(temp_map + ntokens);
252 }
253 
254 
255 
256 /*  Map_goto maps a state/symbol pair into its numeric representation.	*/
257 
258 int
259 map_goto(state, symbol)
260 int state;
261 int symbol;
262 {
263     register int high;
264     register int low;
265     register int middle;
266     register int s;
267 
268     low = goto_map[symbol];
269     high = goto_map[symbol + 1];
270 
271     for (;;)
272     {
273 	assert(low <= high);
274 	middle = (low + high) >> 1;
275 	s = from_state[middle];
276 	if (s == state)
277 	    return (middle);
278 	else if (s < state)
279 	    low = middle + 1;
280 	else
281 	    high = middle - 1;
282     }
283 }
284 
285 
286 
287 initialize_F()
288 {
289   register int i;
290   register int j;
291   register int k;
292   register shifts *sp;
293   register short *edge;
294   register unsigned *rowp;
295   register short *rp;
296   register short **reads;
297   register int nedges;
298   register int stateno;
299   register int symbol;
300   register int nwords;
301 
302   nwords = ngotos * tokensetsize;
303   F = NEW2(nwords, unsigned);
304 
305   reads = NEW2(ngotos, short *);
306   edge = NEW2(ngotos + 1, short);
307   nedges = 0;
308 
309   rowp = F;
310   for (i = 0; i < ngotos; i++)
311     {
312       stateno = to_state[i];
313       sp = shift_table[stateno];
314 
315       if (sp)
316 	{
317 	  k = sp->nshifts;
318 
319 	  for (j = 0; j < k; j++)
320 	    {
321 	      symbol = accessing_symbol[sp->shift[j]];
322 	      if (ISVAR(symbol))
323 		break;
324 	      SETBIT(rowp, symbol);
325 	    }
326 
327 	  for (; j < k; j++)
328 	    {
329 	      symbol = accessing_symbol[sp->shift[j]];
330 	      if (nullable[symbol])
331 		edge[nedges++] = map_goto(stateno, symbol);
332 	    }
333 
334 	  if (nedges)
335 	    {
336 	      reads[i] = rp = NEW2(nedges + 1, short);
337 
338 	      for (j = 0; j < nedges; j++)
339 		rp[j] = edge[j];
340 
341 	      rp[nedges] = -1;
342 	      nedges = 0;
343 	    }
344 	}
345 
346       rowp += tokensetsize;
347     }
348 
349   SETBIT(F, 0);
350   digraph(reads);
351 
352   for (i = 0; i < ngotos; i++)
353     {
354       if (reads[i])
355 	FREE(reads[i]);
356     }
357 
358   FREE(reads);
359   FREE(edge);
360 }
361 
362 
363 
364 build_relations()
365 {
366   register int i;
367   register int j;
368   register int k;
369   register short *rulep;
370   register short *rp;
371   register shifts *sp;
372   register int length;
373   register int nedges;
374   register int done;
375   register int state1;
376   register int stateno;
377   register int symbol1;
378   register int symbol2;
379   register short *shortp;
380   register short *edge;
381   register short *states;
382   register short **new_includes;
383 
384   includes = NEW2(ngotos, short *);
385   edge = NEW2(ngotos + 1, short);
386   states = NEW2(maxrhs + 1, short);
387 
388   for (i = 0; i < ngotos; i++)
389     {
390       nedges = 0;
391       state1 = from_state[i];
392       symbol1 = accessing_symbol[to_state[i]];
393 
394       for (rulep = derives[symbol1]; *rulep >= 0; rulep++)
395 	{
396 	  length = 1;
397 	  states[0] = state1;
398 	  stateno = state1;
399 
400 	  for (rp = ritem + rrhs[*rulep]; *rp >= 0; rp++)
401 	    {
402 	      symbol2 = *rp;
403 	      sp = shift_table[stateno];
404 	      k = sp->nshifts;
405 
406 	      for (j = 0; j < k; j++)
407 		{
408 		  stateno = sp->shift[j];
409 		  if (accessing_symbol[stateno] == symbol2) break;
410 		}
411 
412 	      states[length++] = stateno;
413 	    }
414 
415 	  add_lookback_edge(stateno, *rulep, i);
416 
417 	  length--;
418 	  done = 0;
419 	  while (!done)
420 	    {
421 	      done = 1;
422 	      rp--;
423 	      if (ISVAR(*rp))
424 		{
425 		  stateno = states[--length];
426 		  edge[nedges++] = map_goto(stateno, *rp);
427 		  if (nullable[*rp] && length > 0) done = 0;
428 		}
429 	    }
430 	}
431 
432       if (nedges)
433 	{
434 	  includes[i] = shortp = NEW2(nedges + 1, short);
435 	  for (j = 0; j < nedges; j++)
436 	    shortp[j] = edge[j];
437 	  shortp[nedges] = -1;
438 	}
439     }
440 
441   new_includes = transpose(includes, ngotos);
442 
443   for (i = 0; i < ngotos; i++)
444     if (includes[i])
445       FREE(includes[i]);
446 
447   FREE(includes);
448 
449   includes = new_includes;
450 
451   FREE(edge);
452   FREE(states);
453 }
454 
455 
456 add_lookback_edge(stateno, ruleno, gotono)
457 int stateno, ruleno, gotono;
458 {
459     register int i, k;
460     register int found;
461     register shorts *sp;
462 
463     i = lookaheads[stateno];
464     k = lookaheads[stateno + 1];
465     found = 0;
466     while (!found && i < k)
467     {
468 	if (LAruleno[i] == ruleno)
469 	    found = 1;
470 	else
471 	    ++i;
472     }
473     assert(found);
474 
475     sp = NEW(shorts);
476     sp->next = lookback[i];
477     sp->value = gotono;
478     lookback[i] = sp;
479 }
480 
481 
482 
483 short **
484 transpose(R, n)
485 short **R;
486 int n;
487 {
488   register short **new_R;
489   register short **temp_R;
490   register short *nedges;
491   register short *sp;
492   register int i;
493   register int k;
494 
495   nedges = NEW2(n, short);
496 
497   for (i = 0; i < n; i++)
498     {
499       sp = R[i];
500       if (sp)
501 	{
502 	  while (*sp >= 0)
503 	    nedges[*sp++]++;
504 	}
505     }
506 
507   new_R = NEW2(n, short *);
508   temp_R = NEW2(n, short *);
509 
510   for (i = 0; i < n; i++)
511     {
512       k = nedges[i];
513       if (k > 0)
514 	{
515 	  sp = NEW2(k + 1, short);
516 	  new_R[i] = sp;
517 	  temp_R[i] = sp;
518 	  sp[k] = -1;
519 	}
520     }
521 
522   FREE(nedges);
523 
524   for (i = 0; i < n; i++)
525     {
526       sp = R[i];
527       if (sp)
528 	{
529 	  while (*sp >= 0)
530 	    *temp_R[*sp++]++ = i;
531 	}
532     }
533 
534   FREE(temp_R);
535 
536   return (new_R);
537 }
538 
539 
540 
541 compute_FOLLOWS()
542 {
543   digraph(includes);
544 }
545 
546 
547 compute_lookaheads()
548 {
549   register int i, n;
550   register unsigned *fp1, *fp2, *fp3;
551   register shorts *sp, *next;
552   register unsigned *rowp;
553 
554   rowp = LA;
555   n = lookaheads[nstates];
556   for (i = 0; i < n; i++)
557     {
558       fp3 = rowp + tokensetsize;
559       for (sp = lookback[i]; sp; sp = sp->next)
560 	{
561 	  fp1 = rowp;
562 	  fp2 = F + tokensetsize * sp->value;
563 	  while (fp1 < fp3)
564 	    *fp1++ |= *fp2++;
565 	}
566       rowp = fp3;
567     }
568 
569   for (i = 0; i < n; i++)
570     for (sp = lookback[i]; sp; sp = next)
571       {
572         next = sp->next;
573         FREE(sp);
574       }
575 
576   FREE(lookback);
577   FREE(F);
578 }
579 
580 
581 digraph(relation)
582 short **relation;
583 {
584   register int i;
585 
586   infinity = ngotos + 2;
587   INDEX = NEW2(ngotos + 1, short);
588   VERTICES = NEW2(ngotos + 1, short);
589   top = 0;
590 
591   R = relation;
592 
593   for (i = 0; i < ngotos; i++)
594     INDEX[i] = 0;
595 
596   for (i = 0; i < ngotos; i++)
597     {
598       if (INDEX[i] == 0 && R[i])
599 	traverse(i);
600     }
601 
602   FREE(INDEX);
603   FREE(VERTICES);
604 }
605 
606 
607 
608 traverse(i)
609 register int i;
610 {
611   register unsigned *fp1;
612   register unsigned *fp2;
613   register unsigned *fp3;
614   register int j;
615   register short *rp;
616 
617   int height;
618   unsigned *base;
619 
620   VERTICES[++top] = i;
621   INDEX[i] = height = top;
622 
623   base = F + i * tokensetsize;
624   fp3 = base + tokensetsize;
625 
626   rp = R[i];
627   if (rp)
628     {
629       while ((j = *rp++) >= 0)
630 	{
631 	  if (INDEX[j] == 0)
632 	    traverse(j);
633 
634 	  if (INDEX[i] > INDEX[j])
635 	    INDEX[i] = INDEX[j];
636 
637 	  fp1 = base;
638 	  fp2 = F + j * tokensetsize;
639 
640 	  while (fp1 < fp3)
641 	    *fp1++ |= *fp2++;
642 	}
643     }
644 
645   if (INDEX[i] == height)
646     {
647       for (;;)
648 	{
649 	  j = VERTICES[top--];
650 	  INDEX[j] = infinity;
651 
652 	  if (i == j)
653 	    break;
654 
655 	  fp1 = base;
656 	  fp2 = F + j * tokensetsize;
657 
658 	  while (fp1 < fp3)
659 	    *fp2++ = *fp1++;
660 	}
661     }
662 }
663