xref: /openbsd/usr.bin/yacc/lalr.c (revision 8529ddd3)
1 /*	$OpenBSD: lalr.c,v 1.17 2014/05/17 15:19:17 chl Exp $	*/
2 /*	$NetBSD: lalr.c,v 1.4 1996/03/19 03:21:33 jtc Exp $	*/
3 
4 /*
5  * Copyright (c) 1989 The Regents of the University of California.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Robert Paul Corbett.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include "defs.h"
37 
38 typedef struct shorts {
39 	struct shorts *next;
40 	short value;
41 } shorts;
42 
43 int tokensetsize;
44 short *lookaheads;
45 short *LAruleno;
46 unsigned *LA;
47 short *accessing_symbol;
48 core **state_table;
49 shifts **shift_table;
50 reductions **reduction_table;
51 short *goto_map;
52 short *from_state;
53 short *to_state;
54 
55 short **transpose();
56 void set_state_table(void);
57 void set_accessing_symbol(void);
58 void set_shift_table(void);
59 void set_reduction_table(void);
60 void set_maxrhs(void);
61 void initialize_LA(void);
62 void set_goto_map(void);
63 void initialize_F(void);
64 void build_relations(void);
65 void compute_FOLLOWS(void);
66 void compute_lookaheads(void);
67 int map_goto(int, int);
68 void digraph(short **);
69 void add_lookback_edge(int, int, int);
70 void traverse(int);
71 
72 static int infinity;
73 static int maxrhs;
74 static int ngotos;
75 static unsigned *F;
76 static short **includes;
77 static shorts **lookback;
78 static short **R;
79 static short *INDEX;
80 static short *VERTICES;
81 static int top;
82 
83 void
84 lalr(void)
85 {
86 	tokensetsize = WORDSIZE(ntokens);
87 
88 	set_state_table();
89 	set_accessing_symbol();
90 	set_shift_table();
91 	set_reduction_table();
92 	set_maxrhs();
93 	initialize_LA();
94 	set_goto_map();
95 	initialize_F();
96 	build_relations();
97 	compute_FOLLOWS();
98 	compute_lookaheads();
99 	free_derives();
100 	free_nullable();
101 }
102 
103 
104 void
105 set_state_table(void)
106 {
107 	core *sp;
108 
109 	state_table = NEW2(nstates, core *);
110 	for (sp = first_state; sp; sp = sp->next)
111 		state_table[sp->number] = sp;
112 }
113 
114 
115 void
116 set_accessing_symbol(void)
117 {
118 	core *sp;
119 
120 	accessing_symbol = NEW2(nstates, short);
121 	for (sp = first_state; sp; sp = sp->next)
122 		accessing_symbol[sp->number] = sp->accessing_symbol;
123 }
124 
125 
126 void
127 set_shift_table(void)
128 {
129 	shifts *sp;
130 
131 	shift_table = NEW2(nstates, shifts *);
132 	for (sp = first_shift; sp; sp = sp->next)
133 		shift_table[sp->number] = sp;
134 }
135 
136 
137 void
138 set_reduction_table(void)
139 {
140 	reductions *rp;
141 
142 	reduction_table = NEW2(nstates, reductions *);
143 	for (rp = first_reduction; rp; rp = rp->next)
144 		reduction_table[rp->number] = rp;
145 }
146 
147 
148 void
149 set_maxrhs(void)
150 {
151 	short *itemp, *item_end;
152 	int length, max;
153 
154 	length = 0;
155 	max = 0;
156 	item_end = ritem + nitems;
157 	for (itemp = ritem; itemp < item_end; itemp++) {
158 		if (*itemp >= 0) {
159 			length++;
160 		} else {
161 			if (length > max) max = length;
162 			length = 0;
163 		}
164 	}
165 
166 	maxrhs = max;
167 }
168 
169 
170 void
171 initialize_LA(void)
172 {
173 	int i, j, k;
174 	reductions *rp;
175 
176 	lookaheads = NEW2(nstates + 1, short);
177 
178 	k = 0;
179 	for (i = 0; i < nstates; i++) {
180 		lookaheads[i] = k;
181 		rp = reduction_table[i];
182 		if (rp)
183 			k += rp->nreds;
184 	}
185 	lookaheads[nstates] = k;
186 
187 	LA = NEW2(k * tokensetsize, unsigned);
188 	LAruleno = NEW2(k, short);
189 	lookback = NEW2(k, shorts *);
190 
191 	k = 0;
192 	for (i = 0; i < nstates; i++) {
193 		rp = reduction_table[i];
194 		if (rp) {
195 			for (j = 0; j < rp->nreds; j++) {
196 				LAruleno[k] = rp->rules[j];
197 				k++;
198 			}
199 		}
200 	}
201 }
202 
203 void
204 set_goto_map(void)
205 {
206 	shifts *sp;
207 	int i, k, symbol;
208 	int state1, state2;
209 	short *temp_map;
210 
211 	goto_map = NEW2(nvars + 1, short) - ntokens;
212 	temp_map = NEW2(nvars + 1, short) - ntokens;
213 
214 	ngotos = 0;
215 	for (sp = first_shift; sp; sp = sp->next) {
216 		for (i = sp->nshifts - 1; i >= 0; i--) {
217 			symbol = accessing_symbol[sp->shift[i]];
218 
219 			if (ISTOKEN(symbol)) break;
220 
221 			if (ngotos == MAXSHORT)
222 				fatal("too many gotos");
223 
224 			ngotos++;
225 			goto_map[symbol]++;
226 		}
227 	}
228 
229 	k = 0;
230 	for (i = ntokens; i < nsyms; i++) {
231 		temp_map[i] = k;
232 		k += goto_map[i];
233 	}
234 
235 	for (i = ntokens; i < nsyms; i++)
236 		goto_map[i] = temp_map[i];
237 
238 	goto_map[nsyms] = ngotos;
239 	temp_map[nsyms] = ngotos;
240 
241 	from_state = NEW2(ngotos, short);
242 	to_state = NEW2(ngotos, short);
243 
244 	for (sp = first_shift; sp; sp = sp->next) {
245 		state1 = sp->number;
246 		for (i = sp->nshifts - 1; i >= 0; i--) {
247 			state2 = sp->shift[i];
248 			symbol = accessing_symbol[state2];
249 
250 			if (ISTOKEN(symbol)) break;
251 
252 			k = temp_map[symbol]++;
253 			from_state[k] = state1;
254 			to_state[k] = state2;
255 		}
256 	}
257 
258 	free(temp_map + ntokens);
259 }
260 
261 
262 
263 /*  Map_goto maps a state/symbol pair into its numeric representation.	*/
264 
265 int
266 map_goto(int state, int symbol)
267 {
268 	int high, low, middle, s;
269 
270 	low = goto_map[symbol];
271 	high = goto_map[symbol + 1];
272 
273 	for (;;) {
274 		assert(low <= high);
275 		middle = (low + high) >> 1;
276 		s = from_state[middle];
277 		if (s == state)
278 			return (middle);
279 		else if (s < state)
280 			low = middle + 1;
281 		else
282 			high = middle - 1;
283 	}
284 }
285 
286 
287 void
288 initialize_F(void)
289 {
290 	int i, j, k;
291 	shifts *sp;
292 	short *edge, *rp, **reads;
293 	unsigned int *rowp;
294 	int nedges, stateno, symbol, nwords;
295 
296 	nwords = ngotos * tokensetsize;
297 	F = NEW2(nwords, unsigned);
298 
299 	reads = NEW2(ngotos, short *);
300 	edge = NEW2(ngotos + 1, short);
301 	nedges = 0;
302 
303 	rowp = F;
304 	for (i = 0; i < ngotos; i++) {
305 		stateno = to_state[i];
306 		sp = shift_table[stateno];
307 
308 		if (sp) {
309 			k = sp->nshifts;
310 
311 			for (j = 0; j < k; j++) {
312 				symbol = accessing_symbol[sp->shift[j]];
313 				if (ISVAR(symbol))
314 					break;
315 				SETBIT(rowp, symbol);
316 			}
317 
318 			for (; j < k; j++) {
319 				symbol = accessing_symbol[sp->shift[j]];
320 				if (nullable[symbol])
321 					edge[nedges++] = map_goto(stateno, symbol);
322 			}
323 
324 			if (nedges) {
325 				reads[i] = rp = NEW2(nedges + 1, short);
326 
327 				for (j = 0; j < nedges; j++)
328 					rp[j] = edge[j];
329 
330 				rp[nedges] = -1;
331 				nedges = 0;
332 			}
333 		}
334 
335 		rowp += tokensetsize;
336 	}
337 
338 	SETBIT(F, 0);
339 	digraph(reads);
340 
341 	for (i = 0; i < ngotos; i++) {
342 		if (reads[i])
343 			free(reads[i]);
344 	}
345 
346 	free(reads);
347 	free(edge);
348 }
349 
350 
351 void
352 build_relations(void)
353 {
354 	int i, j, k;
355 	short *rulep, *rp;
356 	shifts *sp;
357 	int length, nedges, done;
358 	int state1, stateno, symbol1, symbol2;
359 	short *shortp, *edge, *states;
360 	short **new_includes;
361 
362 	includes = NEW2(ngotos, short *);
363 	edge = NEW2(ngotos + 1, short);
364 	states = NEW2(maxrhs + 1, short);
365 
366 	for (i = 0; i < ngotos; i++) {
367 		nedges = 0;
368 		state1 = from_state[i];
369 		symbol1 = accessing_symbol[to_state[i]];
370 
371 		for (rulep = derives[symbol1]; *rulep >= 0; rulep++) {
372 			length = 1;
373 			states[0] = state1;
374 			stateno = state1;
375 
376 			for (rp = ritem + rrhs[*rulep]; *rp >= 0; rp++) {
377 				symbol2 = *rp;
378 				sp = shift_table[stateno];
379 				k = sp->nshifts;
380 
381 				for (j = 0; j < k; j++) {
382 					stateno = sp->shift[j];
383 					if (accessing_symbol[stateno] == symbol2)
384 						break;
385 				}
386 
387 				states[length++] = stateno;
388 			}
389 
390 			add_lookback_edge(stateno, *rulep, i);
391 
392 			length--;
393 			done = 0;
394 			while (!done) {
395 				done = 1;
396 				rp--;
397 				if (ISVAR(*rp)) {
398 					stateno = states[--length];
399 					edge[nedges++] = map_goto(stateno, *rp);
400 					if (nullable[*rp] && length > 0)
401 						done = 0;
402 				}
403 			}
404 		}
405 
406 		if (nedges) {
407 			includes[i] = shortp = NEW2(nedges + 1, short);
408 			for (j = 0; j < nedges; j++)
409 				shortp[j] = edge[j];
410 			shortp[nedges] = -1;
411 		}
412 	}
413 
414 	new_includes = transpose(includes, ngotos);
415 
416 	for (i = 0; i < ngotos; i++)
417 		if (includes[i])
418 			free(includes[i]);
419 
420 	free(includes);
421 
422 	includes = new_includes;
423 
424 	free(edge);
425 	free(states);
426 }
427 
428 void
429 add_lookback_edge(int stateno, int ruleno, int gotono)
430 {
431 	int i, k, found;
432 	shorts *sp;
433 
434 	i = lookaheads[stateno];
435 	k = lookaheads[stateno + 1];
436 	found = 0;
437 	while (!found && i < k) {
438 		if (LAruleno[i] == ruleno)
439 			found = 1;
440 		else
441 			++i;
442 	}
443 	assert(found);
444 
445 	sp = NEW(shorts);
446 	sp->next = lookback[i];
447 	sp->value = gotono;
448 	lookback[i] = sp;
449 }
450 
451 
452 
453 short **
454 transpose(short **R, int n)
455 {
456 	short **new_R, **temp_R, *nedges, *sp;
457 	int i, k;
458 
459 	nedges = NEW2(n, short);
460 
461 	for (i = 0; i < n; i++) {
462 		sp = R[i];
463 		if (sp) {
464 			while (*sp >= 0)
465 				nedges[*sp++]++;
466 		}
467 	}
468 
469 	new_R = NEW2(n, short *);
470 	temp_R = NEW2(n, short *);
471 
472 	for (i = 0; i < n; i++) {
473 		k = nedges[i];
474 		if (k > 0) {
475 			sp = NEW2(k + 1, short);
476 			new_R[i] = sp;
477 			temp_R[i] = sp;
478 			sp[k] = -1;
479 		}
480 	}
481 
482 	free(nedges);
483 
484 	for (i = 0; i < n; i++) {
485 		sp = R[i];
486 		if (sp) {
487 			while (*sp >= 0)
488 				*temp_R[*sp++]++ = i;
489 		}
490 	}
491 
492 	free(temp_R);
493 
494 	return (new_R);
495 }
496 
497 
498 void
499 compute_FOLLOWS(void)
500 {
501 	digraph(includes);
502 }
503 
504 void
505 compute_lookaheads(void)
506 {
507 	int i, n;
508 	unsigned int *fp1, *fp2, *fp3;
509 	shorts *sp, *next;
510 	unsigned int *rowp;
511 
512 	rowp = LA;
513 	n = lookaheads[nstates];
514 	for (i = 0; i < n; i++) {
515 		fp3 = rowp + tokensetsize;
516 		for (sp = lookback[i]; sp; sp = sp->next) {
517 			fp1 = rowp;
518 			fp2 = F + tokensetsize * sp->value;
519 			while (fp1 < fp3)
520 				*fp1++ |= *fp2++;
521 		}
522 		rowp = fp3;
523 	}
524 
525 	for (i = 0; i < n; i++)
526 		for (sp = lookback[i]; sp; sp = next) {
527 			next = sp->next;
528 			free(sp);
529 		}
530 
531 	free(lookback);
532 	free(F);
533 }
534 
535 void
536 digraph(short **relation)
537 {
538 	int i;
539 
540 	infinity = ngotos + 2;
541 	INDEX = NEW2(ngotos + 1, short);
542 	VERTICES = NEW2(ngotos + 1, short);
543 	top = 0;
544 	R = relation;
545 
546 	memset(INDEX, 0, ngotos * sizeof(short));
547 	for (i = 0; i < ngotos; i++)
548 		if (R[i])
549 			traverse(i);
550 
551 	free(INDEX);
552 	free(VERTICES);
553 }
554 
555 
556 void
557 traverse(int i)
558 {
559 	unsigned int *base, *fp1, *fp2, *fp3;
560 	int j, height;
561 	short *rp;
562 
563 	VERTICES[++top] = i;
564 	INDEX[i] = height = top;
565 
566 	base = F + i * tokensetsize;
567 	fp3 = base + tokensetsize;
568 
569 	rp = R[i];
570 	if (rp) {
571 		while ((j = *rp++) >= 0) {
572 			if (INDEX[j] == 0)
573 				traverse(j);
574 
575 			if (INDEX[i] > INDEX[j])
576 				INDEX[i] = INDEX[j];
577 
578 			fp1 = base;
579 			fp2 = F + j * tokensetsize;
580 
581 			while (fp1 < fp3)
582 				*fp1++ |= *fp2++;
583 		}
584 	}
585 
586 	if (INDEX[i] == height) {
587 		for (;;) {
588 			j = VERTICES[top--];
589 			INDEX[j] = infinity;
590 
591 			if (i == j)
592 				break;
593 
594 			fp1 = base;
595 			fp2 = F + j * tokensetsize;
596 
597 			while (fp1 < fp3)
598 				*fp2++ = *fp1++;
599 		}
600 	}
601 }
602