1 /* $OpenBSD: lalr.c,v 1.19 2017/05/25 20:11:03 tedu Exp $ */ 2 /* $NetBSD: lalr.c,v 1.4 1996/03/19 03:21:33 jtc Exp $ */ 3 4 /* 5 * Copyright (c) 1989 The Regents of the University of California. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Robert Paul Corbett. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #include "defs.h" 37 38 typedef struct shorts { 39 struct shorts *next; 40 short value; 41 } shorts; 42 43 int tokensetsize; 44 short *lookaheads; 45 short *LAruleno; 46 unsigned *LA; 47 short *accessing_symbol; 48 core **state_table; 49 shifts **shift_table; 50 reductions **reduction_table; 51 short *goto_map; 52 short *from_state; 53 short *to_state; 54 55 short **transpose(short **, int); 56 void set_state_table(void); 57 void set_accessing_symbol(void); 58 void set_shift_table(void); 59 void set_reduction_table(void); 60 void set_maxrhs(void); 61 void initialize_LA(void); 62 void set_goto_map(void); 63 void initialize_F(void); 64 void build_relations(void); 65 void compute_FOLLOWS(void); 66 void compute_lookaheads(void); 67 int map_goto(int, int); 68 void digraph(short **); 69 void add_lookback_edge(int, int, int); 70 void traverse(int); 71 72 static int infinity; 73 static int maxrhs; 74 static int ngotos; 75 static unsigned *F; 76 static short **includes; 77 static shorts **lookback; 78 static short **R; 79 static short *INDEX; 80 static short *VERTICES; 81 static int top; 82 83 void 84 lalr(void) 85 { 86 tokensetsize = WORDSIZE(ntokens); 87 88 set_state_table(); 89 set_accessing_symbol(); 90 set_shift_table(); 91 set_reduction_table(); 92 set_maxrhs(); 93 initialize_LA(); 94 set_goto_map(); 95 initialize_F(); 96 build_relations(); 97 compute_FOLLOWS(); 98 compute_lookaheads(); 99 free_derives(); 100 free_nullable(); 101 } 102 103 104 void 105 set_state_table(void) 106 { 107 core *sp; 108 109 state_table = NEW2(nstates, core *); 110 for (sp = first_state; sp; sp = sp->next) 111 state_table[sp->number] = sp; 112 } 113 114 115 void 116 set_accessing_symbol(void) 117 { 118 core *sp; 119 120 accessing_symbol = NEW2(nstates, short); 121 for (sp = first_state; sp; sp = sp->next) 122 accessing_symbol[sp->number] = sp->accessing_symbol; 123 } 124 125 126 void 127 set_shift_table(void) 128 { 129 shifts *sp; 130 131 shift_table = NEW2(nstates, shifts *); 132 for (sp = first_shift; sp; sp = sp->next) 133 shift_table[sp->number] = sp; 134 } 135 136 137 void 138 set_reduction_table(void) 139 { 140 reductions *rp; 141 142 reduction_table = NEW2(nstates, reductions *); 143 for (rp = first_reduction; rp; rp = rp->next) 144 reduction_table[rp->number] = rp; 145 } 146 147 148 void 149 set_maxrhs(void) 150 { 151 short *itemp, *item_end; 152 int length, max; 153 154 length = 0; 155 max = 0; 156 item_end = ritem + nitems; 157 for (itemp = ritem; itemp < item_end; itemp++) { 158 if (*itemp >= 0) { 159 length++; 160 } else { 161 if (length > max) max = length; 162 length = 0; 163 } 164 } 165 166 maxrhs = max; 167 } 168 169 170 void 171 initialize_LA(void) 172 { 173 int i, j, k; 174 reductions *rp; 175 176 lookaheads = NEW2(nstates + 1, short); 177 178 k = 0; 179 for (i = 0; i < nstates; i++) { 180 lookaheads[i] = k; 181 rp = reduction_table[i]; 182 if (rp) 183 k += rp->nreds; 184 } 185 lookaheads[nstates] = k; 186 187 LA = NEW2(k * tokensetsize, unsigned); 188 LAruleno = NEW2(k, short); 189 lookback = NEW2(k, shorts *); 190 191 k = 0; 192 for (i = 0; i < nstates; i++) { 193 rp = reduction_table[i]; 194 if (rp) { 195 for (j = 0; j < rp->nreds; j++) { 196 LAruleno[k] = rp->rules[j]; 197 k++; 198 } 199 } 200 } 201 } 202 203 void 204 set_goto_map(void) 205 { 206 shifts *sp; 207 int i, k, symbol; 208 int state1, state2; 209 short *temp_map; 210 211 goto_map = NEW2(nvars + 1, short) - ntokens; 212 temp_map = NEW2(nvars + 1, short) - ntokens; 213 214 ngotos = 0; 215 for (sp = first_shift; sp; sp = sp->next) { 216 for (i = sp->nshifts - 1; i >= 0; i--) { 217 symbol = accessing_symbol[sp->shift[i]]; 218 219 if (ISTOKEN(symbol)) break; 220 221 if (ngotos == MAXSHORT) 222 fatal("too many gotos"); 223 224 ngotos++; 225 goto_map[symbol]++; 226 } 227 } 228 229 k = 0; 230 for (i = ntokens; i < nsyms; i++) { 231 temp_map[i] = k; 232 k += goto_map[i]; 233 } 234 235 for (i = ntokens; i < nsyms; i++) 236 goto_map[i] = temp_map[i]; 237 238 goto_map[nsyms] = ngotos; 239 temp_map[nsyms] = ngotos; 240 241 from_state = NEW2(ngotos, short); 242 to_state = NEW2(ngotos, short); 243 244 for (sp = first_shift; sp; sp = sp->next) { 245 state1 = sp->number; 246 for (i = sp->nshifts - 1; i >= 0; i--) { 247 state2 = sp->shift[i]; 248 symbol = accessing_symbol[state2]; 249 250 if (ISTOKEN(symbol)) break; 251 252 k = temp_map[symbol]++; 253 from_state[k] = state1; 254 to_state[k] = state2; 255 } 256 } 257 258 free(temp_map + ntokens); 259 } 260 261 262 263 /* Map_goto maps a state/symbol pair into its numeric representation. */ 264 265 int 266 map_goto(int state, int symbol) 267 { 268 int high, low, middle, s; 269 270 low = goto_map[symbol]; 271 high = goto_map[symbol + 1]; 272 273 for (;;) { 274 assert(low <= high); 275 middle = (low + high) >> 1; 276 s = from_state[middle]; 277 if (s == state) 278 return (middle); 279 else if (s < state) 280 low = middle + 1; 281 else 282 high = middle - 1; 283 } 284 } 285 286 287 void 288 initialize_F(void) 289 { 290 int i, j, k; 291 shifts *sp; 292 short *edge, *rp, **reads; 293 unsigned int *rowp; 294 int nedges, stateno, symbol, nwords; 295 296 nwords = ngotos * tokensetsize; 297 F = NEW2(nwords, unsigned); 298 299 reads = NEW2(ngotos, short *); 300 edge = NEW2(ngotos + 1, short); 301 nedges = 0; 302 303 rowp = F; 304 for (i = 0; i < ngotos; i++) { 305 stateno = to_state[i]; 306 sp = shift_table[stateno]; 307 308 if (sp) { 309 k = sp->nshifts; 310 311 for (j = 0; j < k; j++) { 312 symbol = accessing_symbol[sp->shift[j]]; 313 if (ISVAR(symbol)) 314 break; 315 SETBIT(rowp, symbol); 316 } 317 318 for (; j < k; j++) { 319 symbol = accessing_symbol[sp->shift[j]]; 320 if (nullable[symbol]) 321 edge[nedges++] = map_goto(stateno, symbol); 322 } 323 324 if (nedges) { 325 reads[i] = rp = NEW2(nedges + 1, short); 326 327 for (j = 0; j < nedges; j++) 328 rp[j] = edge[j]; 329 330 rp[nedges] = -1; 331 nedges = 0; 332 } 333 } 334 335 rowp += tokensetsize; 336 } 337 338 SETBIT(F, 0); 339 digraph(reads); 340 341 for (i = 0; i < ngotos; i++) 342 free(reads[i]); 343 344 free(reads); 345 free(edge); 346 } 347 348 349 void 350 build_relations(void) 351 { 352 int i, j, k; 353 short *rulep, *rp; 354 shifts *sp; 355 int length, nedges, done; 356 int state1, stateno, symbol1, symbol2; 357 short *shortp, *edge, *states; 358 short **new_includes; 359 360 includes = NEW2(ngotos, short *); 361 edge = NEW2(ngotos + 1, short); 362 states = NEW2(maxrhs + 1, short); 363 364 for (i = 0; i < ngotos; i++) { 365 nedges = 0; 366 state1 = from_state[i]; 367 symbol1 = accessing_symbol[to_state[i]]; 368 369 for (rulep = derives[symbol1]; *rulep >= 0; rulep++) { 370 length = 1; 371 states[0] = state1; 372 stateno = state1; 373 374 for (rp = ritem + rrhs[*rulep]; *rp >= 0; rp++) { 375 symbol2 = *rp; 376 sp = shift_table[stateno]; 377 k = sp->nshifts; 378 379 for (j = 0; j < k; j++) { 380 stateno = sp->shift[j]; 381 if (accessing_symbol[stateno] == symbol2) 382 break; 383 } 384 385 states[length++] = stateno; 386 } 387 388 add_lookback_edge(stateno, *rulep, i); 389 390 length--; 391 done = 0; 392 while (!done) { 393 done = 1; 394 rp--; 395 if (ISVAR(*rp)) { 396 stateno = states[--length]; 397 edge[nedges++] = map_goto(stateno, *rp); 398 if (nullable[*rp] && length > 0) 399 done = 0; 400 } 401 } 402 } 403 404 if (nedges) { 405 includes[i] = shortp = NEW2(nedges + 1, short); 406 for (j = 0; j < nedges; j++) 407 shortp[j] = edge[j]; 408 shortp[nedges] = -1; 409 } 410 } 411 412 new_includes = transpose(includes, ngotos); 413 414 for (i = 0; i < ngotos; i++) 415 free(includes[i]); 416 417 free(includes); 418 419 includes = new_includes; 420 421 free(edge); 422 free(states); 423 } 424 425 void 426 add_lookback_edge(int stateno, int ruleno, int gotono) 427 { 428 int i, k, found; 429 shorts *sp; 430 431 i = lookaheads[stateno]; 432 k = lookaheads[stateno + 1]; 433 found = 0; 434 while (!found && i < k) { 435 if (LAruleno[i] == ruleno) 436 found = 1; 437 else 438 ++i; 439 } 440 assert(found); 441 442 sp = NEW(shorts); 443 sp->next = lookback[i]; 444 sp->value = gotono; 445 lookback[i] = sp; 446 } 447 448 449 450 short ** 451 transpose(short **old_R, int n) 452 { 453 short **new_R, **temp_R, *nedges, *sp; 454 int i, k; 455 456 nedges = NEW2(n, short); 457 458 for (i = 0; i < n; i++) { 459 sp = old_R[i]; 460 if (sp) { 461 while (*sp >= 0) 462 nedges[*sp++]++; 463 } 464 } 465 466 new_R = NEW2(n, short *); 467 temp_R = NEW2(n, short *); 468 469 for (i = 0; i < n; i++) { 470 k = nedges[i]; 471 if (k > 0) { 472 sp = NEW2(k + 1, short); 473 new_R[i] = sp; 474 temp_R[i] = sp; 475 sp[k] = -1; 476 } 477 } 478 479 free(nedges); 480 481 for (i = 0; i < n; i++) { 482 sp = old_R[i]; 483 if (sp) { 484 while (*sp >= 0) 485 *temp_R[*sp++]++ = i; 486 } 487 } 488 489 free(temp_R); 490 491 return (new_R); 492 } 493 494 495 void 496 compute_FOLLOWS(void) 497 { 498 digraph(includes); 499 } 500 501 void 502 compute_lookaheads(void) 503 { 504 int i, n; 505 unsigned int *fp1, *fp2, *fp3; 506 shorts *sp, *next; 507 unsigned int *rowp; 508 509 rowp = LA; 510 n = lookaheads[nstates]; 511 for (i = 0; i < n; i++) { 512 fp3 = rowp + tokensetsize; 513 for (sp = lookback[i]; sp; sp = sp->next) { 514 fp1 = rowp; 515 fp2 = F + tokensetsize * sp->value; 516 while (fp1 < fp3) 517 *fp1++ |= *fp2++; 518 } 519 rowp = fp3; 520 } 521 522 for (i = 0; i < n; i++) 523 for (sp = lookback[i]; sp; sp = next) { 524 next = sp->next; 525 free(sp); 526 } 527 528 free(lookback); 529 free(F); 530 } 531 532 void 533 digraph(short **relation) 534 { 535 int i; 536 537 infinity = ngotos + 2; 538 INDEX = NEW2(ngotos + 1, short); 539 VERTICES = NEW2(ngotos + 1, short); 540 top = 0; 541 R = relation; 542 543 memset(INDEX, 0, ngotos * sizeof(short)); 544 for (i = 0; i < ngotos; i++) 545 if (R[i]) 546 traverse(i); 547 548 free(INDEX); 549 free(VERTICES); 550 } 551 552 553 void 554 traverse(int i) 555 { 556 unsigned int *base, *fp1, *fp2, *fp3; 557 int j, height; 558 short *rp; 559 560 VERTICES[++top] = i; 561 INDEX[i] = height = top; 562 563 base = F + i * tokensetsize; 564 fp3 = base + tokensetsize; 565 566 rp = R[i]; 567 if (rp) { 568 while ((j = *rp++) >= 0) { 569 if (INDEX[j] == 0) 570 traverse(j); 571 572 if (INDEX[i] > INDEX[j]) 573 INDEX[i] = INDEX[j]; 574 575 fp1 = base; 576 fp2 = F + j * tokensetsize; 577 578 while (fp1 < fp3) 579 *fp1++ |= *fp2++; 580 } 581 } 582 583 if (INDEX[i] == height) { 584 for (;;) { 585 j = VERTICES[top--]; 586 INDEX[j] = infinity; 587 588 if (i == j) 589 break; 590 591 fp1 = base; 592 fp2 = F + j * tokensetsize; 593 594 while (fp1 < fp3) 595 *fp2++ = *fp1++; 596 } 597 } 598 } 599