xref: /openbsd/usr.bin/yacc/lalr.c (revision 54b00945)
1 /*	$OpenBSD: lalr.c,v 1.19 2017/05/25 20:11:03 tedu Exp $	*/
2 /*	$NetBSD: lalr.c,v 1.4 1996/03/19 03:21:33 jtc Exp $	*/
3 
4 /*
5  * Copyright (c) 1989 The Regents of the University of California.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Robert Paul Corbett.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include "defs.h"
37 
38 typedef struct shorts {
39 	struct shorts *next;
40 	short value;
41 } shorts;
42 
43 int tokensetsize;
44 short *lookaheads;
45 short *LAruleno;
46 unsigned *LA;
47 short *accessing_symbol;
48 core **state_table;
49 shifts **shift_table;
50 reductions **reduction_table;
51 short *goto_map;
52 short *from_state;
53 short *to_state;
54 
55 short **transpose(short **, int);
56 void set_state_table(void);
57 void set_accessing_symbol(void);
58 void set_shift_table(void);
59 void set_reduction_table(void);
60 void set_maxrhs(void);
61 void initialize_LA(void);
62 void set_goto_map(void);
63 void initialize_F(void);
64 void build_relations(void);
65 void compute_FOLLOWS(void);
66 void compute_lookaheads(void);
67 int map_goto(int, int);
68 void digraph(short **);
69 void add_lookback_edge(int, int, int);
70 void traverse(int);
71 
72 static int infinity;
73 static int maxrhs;
74 static int ngotos;
75 static unsigned *F;
76 static short **includes;
77 static shorts **lookback;
78 static short **R;
79 static short *INDEX;
80 static short *VERTICES;
81 static int top;
82 
83 void
lalr(void)84 lalr(void)
85 {
86 	tokensetsize = WORDSIZE(ntokens);
87 
88 	set_state_table();
89 	set_accessing_symbol();
90 	set_shift_table();
91 	set_reduction_table();
92 	set_maxrhs();
93 	initialize_LA();
94 	set_goto_map();
95 	initialize_F();
96 	build_relations();
97 	compute_FOLLOWS();
98 	compute_lookaheads();
99 	free_derives();
100 	free_nullable();
101 }
102 
103 
104 void
set_state_table(void)105 set_state_table(void)
106 {
107 	core *sp;
108 
109 	state_table = NEW2(nstates, core *);
110 	for (sp = first_state; sp; sp = sp->next)
111 		state_table[sp->number] = sp;
112 }
113 
114 
115 void
set_accessing_symbol(void)116 set_accessing_symbol(void)
117 {
118 	core *sp;
119 
120 	accessing_symbol = NEW2(nstates, short);
121 	for (sp = first_state; sp; sp = sp->next)
122 		accessing_symbol[sp->number] = sp->accessing_symbol;
123 }
124 
125 
126 void
set_shift_table(void)127 set_shift_table(void)
128 {
129 	shifts *sp;
130 
131 	shift_table = NEW2(nstates, shifts *);
132 	for (sp = first_shift; sp; sp = sp->next)
133 		shift_table[sp->number] = sp;
134 }
135 
136 
137 void
set_reduction_table(void)138 set_reduction_table(void)
139 {
140 	reductions *rp;
141 
142 	reduction_table = NEW2(nstates, reductions *);
143 	for (rp = first_reduction; rp; rp = rp->next)
144 		reduction_table[rp->number] = rp;
145 }
146 
147 
148 void
set_maxrhs(void)149 set_maxrhs(void)
150 {
151 	short *itemp, *item_end;
152 	int length, max;
153 
154 	length = 0;
155 	max = 0;
156 	item_end = ritem + nitems;
157 	for (itemp = ritem; itemp < item_end; itemp++) {
158 		if (*itemp >= 0) {
159 			length++;
160 		} else {
161 			if (length > max) max = length;
162 			length = 0;
163 		}
164 	}
165 
166 	maxrhs = max;
167 }
168 
169 
170 void
initialize_LA(void)171 initialize_LA(void)
172 {
173 	int i, j, k;
174 	reductions *rp;
175 
176 	lookaheads = NEW2(nstates + 1, short);
177 
178 	k = 0;
179 	for (i = 0; i < nstates; i++) {
180 		lookaheads[i] = k;
181 		rp = reduction_table[i];
182 		if (rp)
183 			k += rp->nreds;
184 	}
185 	lookaheads[nstates] = k;
186 
187 	LA = NEW2(k * tokensetsize, unsigned);
188 	LAruleno = NEW2(k, short);
189 	lookback = NEW2(k, shorts *);
190 
191 	k = 0;
192 	for (i = 0; i < nstates; i++) {
193 		rp = reduction_table[i];
194 		if (rp) {
195 			for (j = 0; j < rp->nreds; j++) {
196 				LAruleno[k] = rp->rules[j];
197 				k++;
198 			}
199 		}
200 	}
201 }
202 
203 void
set_goto_map(void)204 set_goto_map(void)
205 {
206 	shifts *sp;
207 	int i, k, symbol;
208 	int state1, state2;
209 	short *temp_map;
210 
211 	goto_map = NEW2(nvars + 1, short) - ntokens;
212 	temp_map = NEW2(nvars + 1, short) - ntokens;
213 
214 	ngotos = 0;
215 	for (sp = first_shift; sp; sp = sp->next) {
216 		for (i = sp->nshifts - 1; i >= 0; i--) {
217 			symbol = accessing_symbol[sp->shift[i]];
218 
219 			if (ISTOKEN(symbol)) break;
220 
221 			if (ngotos == MAXSHORT)
222 				fatal("too many gotos");
223 
224 			ngotos++;
225 			goto_map[symbol]++;
226 		}
227 	}
228 
229 	k = 0;
230 	for (i = ntokens; i < nsyms; i++) {
231 		temp_map[i] = k;
232 		k += goto_map[i];
233 	}
234 
235 	for (i = ntokens; i < nsyms; i++)
236 		goto_map[i] = temp_map[i];
237 
238 	goto_map[nsyms] = ngotos;
239 	temp_map[nsyms] = ngotos;
240 
241 	from_state = NEW2(ngotos, short);
242 	to_state = NEW2(ngotos, short);
243 
244 	for (sp = first_shift; sp; sp = sp->next) {
245 		state1 = sp->number;
246 		for (i = sp->nshifts - 1; i >= 0; i--) {
247 			state2 = sp->shift[i];
248 			symbol = accessing_symbol[state2];
249 
250 			if (ISTOKEN(symbol)) break;
251 
252 			k = temp_map[symbol]++;
253 			from_state[k] = state1;
254 			to_state[k] = state2;
255 		}
256 	}
257 
258 	free(temp_map + ntokens);
259 }
260 
261 
262 
263 /*  Map_goto maps a state/symbol pair into its numeric representation.	*/
264 
265 int
map_goto(int state,int symbol)266 map_goto(int state, int symbol)
267 {
268 	int high, low, middle, s;
269 
270 	low = goto_map[symbol];
271 	high = goto_map[symbol + 1];
272 
273 	for (;;) {
274 		assert(low <= high);
275 		middle = (low + high) >> 1;
276 		s = from_state[middle];
277 		if (s == state)
278 			return (middle);
279 		else if (s < state)
280 			low = middle + 1;
281 		else
282 			high = middle - 1;
283 	}
284 }
285 
286 
287 void
initialize_F(void)288 initialize_F(void)
289 {
290 	int i, j, k;
291 	shifts *sp;
292 	short *edge, *rp, **reads;
293 	unsigned int *rowp;
294 	int nedges, stateno, symbol, nwords;
295 
296 	nwords = ngotos * tokensetsize;
297 	F = NEW2(nwords, unsigned);
298 
299 	reads = NEW2(ngotos, short *);
300 	edge = NEW2(ngotos + 1, short);
301 	nedges = 0;
302 
303 	rowp = F;
304 	for (i = 0; i < ngotos; i++) {
305 		stateno = to_state[i];
306 		sp = shift_table[stateno];
307 
308 		if (sp) {
309 			k = sp->nshifts;
310 
311 			for (j = 0; j < k; j++) {
312 				symbol = accessing_symbol[sp->shift[j]];
313 				if (ISVAR(symbol))
314 					break;
315 				SETBIT(rowp, symbol);
316 			}
317 
318 			for (; j < k; j++) {
319 				symbol = accessing_symbol[sp->shift[j]];
320 				if (nullable[symbol])
321 					edge[nedges++] = map_goto(stateno, symbol);
322 			}
323 
324 			if (nedges) {
325 				reads[i] = rp = NEW2(nedges + 1, short);
326 
327 				for (j = 0; j < nedges; j++)
328 					rp[j] = edge[j];
329 
330 				rp[nedges] = -1;
331 				nedges = 0;
332 			}
333 		}
334 
335 		rowp += tokensetsize;
336 	}
337 
338 	SETBIT(F, 0);
339 	digraph(reads);
340 
341 	for (i = 0; i < ngotos; i++)
342 		free(reads[i]);
343 
344 	free(reads);
345 	free(edge);
346 }
347 
348 
349 void
build_relations(void)350 build_relations(void)
351 {
352 	int i, j, k;
353 	short *rulep, *rp;
354 	shifts *sp;
355 	int length, nedges, done;
356 	int state1, stateno, symbol1, symbol2;
357 	short *shortp, *edge, *states;
358 	short **new_includes;
359 
360 	includes = NEW2(ngotos, short *);
361 	edge = NEW2(ngotos + 1, short);
362 	states = NEW2(maxrhs + 1, short);
363 
364 	for (i = 0; i < ngotos; i++) {
365 		nedges = 0;
366 		state1 = from_state[i];
367 		symbol1 = accessing_symbol[to_state[i]];
368 
369 		for (rulep = derives[symbol1]; *rulep >= 0; rulep++) {
370 			length = 1;
371 			states[0] = state1;
372 			stateno = state1;
373 
374 			for (rp = ritem + rrhs[*rulep]; *rp >= 0; rp++) {
375 				symbol2 = *rp;
376 				sp = shift_table[stateno];
377 				k = sp->nshifts;
378 
379 				for (j = 0; j < k; j++) {
380 					stateno = sp->shift[j];
381 					if (accessing_symbol[stateno] == symbol2)
382 						break;
383 				}
384 
385 				states[length++] = stateno;
386 			}
387 
388 			add_lookback_edge(stateno, *rulep, i);
389 
390 			length--;
391 			done = 0;
392 			while (!done) {
393 				done = 1;
394 				rp--;
395 				if (ISVAR(*rp)) {
396 					stateno = states[--length];
397 					edge[nedges++] = map_goto(stateno, *rp);
398 					if (nullable[*rp] && length > 0)
399 						done = 0;
400 				}
401 			}
402 		}
403 
404 		if (nedges) {
405 			includes[i] = shortp = NEW2(nedges + 1, short);
406 			for (j = 0; j < nedges; j++)
407 				shortp[j] = edge[j];
408 			shortp[nedges] = -1;
409 		}
410 	}
411 
412 	new_includes = transpose(includes, ngotos);
413 
414 	for (i = 0; i < ngotos; i++)
415 		free(includes[i]);
416 
417 	free(includes);
418 
419 	includes = new_includes;
420 
421 	free(edge);
422 	free(states);
423 }
424 
425 void
add_lookback_edge(int stateno,int ruleno,int gotono)426 add_lookback_edge(int stateno, int ruleno, int gotono)
427 {
428 	int i, k, found;
429 	shorts *sp;
430 
431 	i = lookaheads[stateno];
432 	k = lookaheads[stateno + 1];
433 	found = 0;
434 	while (!found && i < k) {
435 		if (LAruleno[i] == ruleno)
436 			found = 1;
437 		else
438 			++i;
439 	}
440 	assert(found);
441 
442 	sp = NEW(shorts);
443 	sp->next = lookback[i];
444 	sp->value = gotono;
445 	lookback[i] = sp;
446 }
447 
448 
449 
450 short **
transpose(short ** old_R,int n)451 transpose(short **old_R, int n)
452 {
453 	short **new_R, **temp_R, *nedges, *sp;
454 	int i, k;
455 
456 	nedges = NEW2(n, short);
457 
458 	for (i = 0; i < n; i++) {
459 		sp = old_R[i];
460 		if (sp) {
461 			while (*sp >= 0)
462 				nedges[*sp++]++;
463 		}
464 	}
465 
466 	new_R = NEW2(n, short *);
467 	temp_R = NEW2(n, short *);
468 
469 	for (i = 0; i < n; i++) {
470 		k = nedges[i];
471 		if (k > 0) {
472 			sp = NEW2(k + 1, short);
473 			new_R[i] = sp;
474 			temp_R[i] = sp;
475 			sp[k] = -1;
476 		}
477 	}
478 
479 	free(nedges);
480 
481 	for (i = 0; i < n; i++) {
482 		sp = old_R[i];
483 		if (sp) {
484 			while (*sp >= 0)
485 				*temp_R[*sp++]++ = i;
486 		}
487 	}
488 
489 	free(temp_R);
490 
491 	return (new_R);
492 }
493 
494 
495 void
compute_FOLLOWS(void)496 compute_FOLLOWS(void)
497 {
498 	digraph(includes);
499 }
500 
501 void
compute_lookaheads(void)502 compute_lookaheads(void)
503 {
504 	int i, n;
505 	unsigned int *fp1, *fp2, *fp3;
506 	shorts *sp, *next;
507 	unsigned int *rowp;
508 
509 	rowp = LA;
510 	n = lookaheads[nstates];
511 	for (i = 0; i < n; i++) {
512 		fp3 = rowp + tokensetsize;
513 		for (sp = lookback[i]; sp; sp = sp->next) {
514 			fp1 = rowp;
515 			fp2 = F + tokensetsize * sp->value;
516 			while (fp1 < fp3)
517 				*fp1++ |= *fp2++;
518 		}
519 		rowp = fp3;
520 	}
521 
522 	for (i = 0; i < n; i++)
523 		for (sp = lookback[i]; sp; sp = next) {
524 			next = sp->next;
525 			free(sp);
526 		}
527 
528 	free(lookback);
529 	free(F);
530 }
531 
532 void
digraph(short ** relation)533 digraph(short **relation)
534 {
535 	int i;
536 
537 	infinity = ngotos + 2;
538 	INDEX = NEW2(ngotos + 1, short);
539 	VERTICES = NEW2(ngotos + 1, short);
540 	top = 0;
541 	R = relation;
542 
543 	memset(INDEX, 0, ngotos * sizeof(short));
544 	for (i = 0; i < ngotos; i++)
545 		if (R[i])
546 			traverse(i);
547 
548 	free(INDEX);
549 	free(VERTICES);
550 }
551 
552 
553 void
traverse(int i)554 traverse(int i)
555 {
556 	unsigned int *base, *fp1, *fp2, *fp3;
557 	int j, height;
558 	short *rp;
559 
560 	VERTICES[++top] = i;
561 	INDEX[i] = height = top;
562 
563 	base = F + i * tokensetsize;
564 	fp3 = base + tokensetsize;
565 
566 	rp = R[i];
567 	if (rp) {
568 		while ((j = *rp++) >= 0) {
569 			if (INDEX[j] == 0)
570 				traverse(j);
571 
572 			if (INDEX[i] > INDEX[j])
573 				INDEX[i] = INDEX[j];
574 
575 			fp1 = base;
576 			fp2 = F + j * tokensetsize;
577 
578 			while (fp1 < fp3)
579 				*fp1++ |= *fp2++;
580 		}
581 	}
582 
583 	if (INDEX[i] == height) {
584 		for (;;) {
585 			j = VERTICES[top--];
586 			INDEX[j] = infinity;
587 
588 			if (i == j)
589 				break;
590 
591 			fp1 = base;
592 			fp2 = F + j * tokensetsize;
593 
594 			while (fp1 < fp3)
595 				*fp2++ = *fp1++;
596 		}
597 	}
598 }
599