1 /*- 2 * Copyright (c) 1991 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * %sccs.include.proprietary.c% 6 */ 7 8 #ifndef lint 9 static char sccsid[] = "@(#)b.c 4.4 (Berkeley) 04/17/91"; 10 #endif /* not lint */ 11 12 #include "awk.def" 13 #include "stdio.h" 14 #include "awk.h" 15 16 extern node *op2(); 17 extern struct fa *cgotofn(); 18 #define MAXLIN 256 19 #define NCHARS 128 20 #define NSTATES 256 21 22 #define type(v) v->nobj 23 #define left(v) v->narg[0] 24 #define right(v) v->narg[1] 25 #define parent(v) v->nnext 26 27 #define LEAF case CCL: case NCCL: case CHAR: case DOT: 28 #define UNARY case FINAL: case STAR: case PLUS: case QUEST: 29 30 /* encoding in tree nodes: 31 leaf (CCL, NCCL, CHAR, DOT): left is index, right contains value or pointer to value 32 unary (FINAL, STAR, PLUS, QUEST): left is child, right is null 33 binary (CAT, OR): left and right are children 34 parent contains pointer to parent 35 */ 36 37 struct fa { 38 int cch; 39 struct fa *st; 40 }; 41 42 int *state[NSTATES]; 43 int *foll[MAXLIN]; 44 char chars[MAXLIN]; 45 int setvec[MAXLIN]; 46 node *point[MAXLIN]; 47 48 int setcnt; 49 int line; 50 int maxfoll; /* index of highest foll[] entry set by cfoll() */ 51 52 53 struct fa *makedfa(p) /* returns dfa for tree pointed to by p */ 54 node *p; 55 { 56 node *p1; 57 struct fa *fap; 58 p1 = op2(CAT, op2(STAR, op2(DOT, (node *) 0, (node *) 0), (node *) 0), p); 59 /* put DOT STAR in front of reg. exp. */ 60 p1 = op2(FINAL, p1, (node *) 0); /* install FINAL node */ 61 62 line = 0; 63 penter(p1); /* enter parent pointers and leaf indices */ 64 point[line] = p1; /* FINAL node */ 65 setvec[0] = 1; /* for initial DOT STAR */ 66 cfoll(p1); /* set up follow sets */ 67 fap = cgotofn(); 68 freetr(p1); /* add this when alloc works */ 69 return(fap); 70 } 71 72 penter(p) /* set up parent pointers and leaf indices */ 73 node *p; 74 { 75 switch(type(p)) { 76 LEAF 77 left(p) = (node *) line; 78 point[line++] = p; 79 break; 80 UNARY 81 penter(left(p)); 82 parent(left(p)) = p; 83 break; 84 case CAT: 85 case OR: 86 penter(left(p)); 87 penter(right(p)); 88 parent(left(p)) = p; 89 parent(right(p)) = p; 90 break; 91 default: 92 error(FATAL, "unknown type %d in penter\n", type(p)); 93 break; 94 } 95 } 96 97 freetr(p) /* free parse tree and follow sets */ 98 node *p; 99 { 100 switch(type(p)) { 101 LEAF 102 foll_free((int) left(p)); 103 xfree(p); 104 break; 105 UNARY 106 freetr(left(p)); 107 xfree(p); 108 break; 109 case CAT: 110 case OR: 111 freetr(left(p)); 112 freetr(right(p)); 113 xfree(p); 114 break; 115 default: 116 error(FATAL, "unknown type %d in freetr", type(p)); 117 break; 118 } 119 } 120 char *cclenter(p) 121 register char *p; 122 { 123 register i, c; 124 char *op; 125 126 op = p; 127 i = 0; 128 while ((c = *p++) != 0) { 129 if (c == '-' && i > 0 && chars[i-1] != 0) { 130 if (*p != 0) { 131 c = chars[i-1]; 132 while (c < *p) { 133 if (i >= MAXLIN) 134 overflo(); 135 chars[i++] = ++c; 136 } 137 p++; 138 continue; 139 } 140 } 141 if (i >= MAXLIN) 142 overflo(); 143 chars[i++] = c; 144 } 145 chars[i++] = '\0'; 146 dprintf("cclenter: in = |%s|, out = |%s|\n", op, chars, NULL); 147 xfree(op); 148 return(tostring(chars)); 149 } 150 151 overflo() 152 { 153 error(FATAL, "regular expression too long\n"); 154 } 155 156 cfoll(v) /* enter follow set of each leaf of vertex v into foll[leaf] */ 157 register node *v; 158 { 159 register i; 160 int prev; 161 int *add(); 162 163 maxfoll = -1; 164 switch(type(v)) { 165 LEAF 166 setcnt = 0; 167 for (i=1; i<=line; i++) 168 setvec[i] = 0; 169 follow(v); 170 if (notin(foll, ( (int) left(v))-1, &prev)) { 171 foll[(int) left(v)] = add(setcnt); 172 } 173 else 174 foll[ (int) left(v)] = foll[prev]; 175 if ((int)left(v) > maxfoll) 176 maxfoll = (int)left(v); 177 break; 178 UNARY 179 cfoll(left(v)); 180 break; 181 case CAT: 182 case OR: 183 cfoll(left(v)); 184 cfoll(right(v)); 185 break; 186 default: 187 error(FATAL, "unknown type %d in cfoll", type(v)); 188 } 189 } 190 191 first(p) /* collects initially active leaves of p into setvec */ 192 register node *p; /* returns 0 or 1 depending on whether p matches empty string */ 193 { 194 register b; 195 196 switch(type(p)) { 197 LEAF 198 if (setvec[(int) left(p)] != 1) { 199 setvec[(int) left(p)] = 1; 200 setcnt++; 201 } 202 if (type(p) == CCL && (*(char *) right(p)) == '\0') 203 return(0); /* empty CCL */ 204 else return(1); 205 case FINAL: 206 case PLUS: 207 if (first(left(p)) == 0) return(0); 208 return(1); 209 case STAR: 210 case QUEST: 211 first(left(p)); 212 return(0); 213 case CAT: 214 if (first(left(p)) == 0 && first(right(p)) == 0) return(0); 215 return(1); 216 case OR: 217 b = first(right(p)); 218 if (first(left(p)) == 0 || b == 0) return(0); 219 return(1); 220 } 221 error(FATAL, "unknown type %d in first\n", type(p)); 222 return(-1); 223 } 224 225 follow(v) 226 node *v; /* collects leaves that can follow v into setvec */ 227 { 228 node *p; 229 230 if (type(v) == FINAL) 231 return; 232 p = parent(v); 233 switch (type(p)) { 234 case STAR: 235 case PLUS: first(v); 236 follow(p); 237 return; 238 239 case OR: 240 case QUEST: follow(p); 241 return; 242 243 case CAT: if (v == left(p)) { /* v is left child of p */ 244 if (first(right(p)) == 0) { 245 follow(p); 246 return; 247 } 248 } 249 else /* v is right child */ 250 follow(p); 251 return; 252 case FINAL: if (setvec[line] != 1) { 253 setvec[line] = 1; 254 setcnt++; 255 } 256 return; 257 } 258 } 259 260 member(c, s) /* is c in s? */ 261 register char c, *s; 262 { 263 while (*s) 264 if (c == *s++) 265 return(1); 266 return(0); 267 } 268 269 notin(array, n, prev) /* is setvec in array[0] thru array[n]? */ 270 int **array; 271 int *prev; { 272 register i, j; 273 int *ptr; 274 for (i=0; i<=n; i++) { 275 ptr = array[i]; 276 if (*ptr == setcnt) { 277 for (j=0; j < setcnt; j++) 278 if (setvec[*(++ptr)] != 1) goto nxt; 279 *prev = i; 280 return(0); 281 } 282 nxt: ; 283 } 284 return(1); 285 } 286 287 int *add(n) { /* remember setvec */ 288 int *ptr, *p; 289 register i; 290 if ((p = ptr = (int *) malloc((n+1)*sizeof(int))) == NULL) 291 overflo(); 292 *ptr = n; 293 dprintf("add(%d)\n", n, NULL, NULL); 294 for (i=1; i <= line; i++) 295 if (setvec[i] == 1) { 296 *(++ptr) = i; 297 dprintf(" ptr = %o, *ptr = %d, i = %d\n", ptr, *ptr, i); 298 } 299 dprintf("\n", NULL, NULL, NULL); 300 return(p); 301 } 302 303 struct fa *cgotofn() 304 { 305 register i, k; 306 register int *ptr; 307 char c; 308 char *p; 309 node *cp; 310 int j, n, s, ind, numtrans; 311 int finflg; 312 int curpos, num, prev; 313 struct fa *where[NSTATES]; 314 315 int fatab[257]; 316 struct fa *pfa; 317 318 char index[MAXLIN]; 319 char iposns[MAXLIN]; 320 int sposns[MAXLIN]; 321 int spmax, spinit; 322 323 char symbol[NCHARS]; 324 char isyms[NCHARS]; 325 char ssyms[NCHARS]; 326 int ssmax, ssinit; 327 328 for (i=0; i<=line; i++) index[i] = iposns[i] = setvec[i] = 0; 329 for (i=0; i<NCHARS; i++) isyms[i] = symbol[i] = 0; 330 setcnt = 0; 331 /* compute initial positions and symbols of state 0 */ 332 ssmax = 0; 333 ptr = state[0] = foll[0]; 334 spinit = *ptr; 335 for (i=0; i<spinit; i++) { 336 curpos = *(++ptr); 337 sposns[i] = curpos; 338 iposns[curpos] = 1; 339 cp = point[curpos]; 340 dprintf("i = %d, spinit = %d, curpos = %d\n", i, spinit, curpos); 341 switch (type(cp)) { 342 case CHAR: 343 k = (int) right(cp); 344 if (isyms[k] != 1) { 345 isyms[k] = 1; 346 ssyms[ssmax++] = k; 347 } 348 break; 349 case DOT: 350 for (k=1; k<NCHARS; k++) { 351 if (k != HAT) { 352 if (isyms[k] != 1) { 353 isyms[k] = 1; 354 ssyms[ssmax++] = k; 355 } 356 } 357 } 358 break; 359 case CCL: 360 for (p = (char *) right(cp); *p; p++) { 361 if (*p != HAT) { 362 if (isyms[*p] != 1) { 363 isyms[*p] = 1; 364 ssyms[ssmax++] = *p; 365 } 366 } 367 } 368 break; 369 case NCCL: 370 for (k=1; k<NCHARS; k++) { 371 if (k != HAT && !member(k, (char *) right(cp))) { 372 if (isyms[k] != 1) { 373 isyms[k] = 1; 374 ssyms[ssmax++] = k; 375 } 376 } 377 } 378 } 379 } 380 ssinit = ssmax; 381 n = 0; 382 for (s=0; s<=n; s++) { 383 dprintf("s = %d\n", s, NULL, NULL); 384 ind = 0; 385 numtrans = 0; 386 finflg = 0; 387 if (*(state[s] + *state[s]) == line) { /* s final? */ 388 finflg = 1; 389 goto tenter; 390 } 391 spmax = spinit; 392 ssmax = ssinit; 393 ptr = state[s]; 394 num = *ptr; 395 for (i=0; i<num; i++) { 396 curpos = *(++ptr); 397 if (iposns[curpos] != 1 && index[curpos] != 1) { 398 index[curpos] = 1; 399 sposns[spmax++] = curpos; 400 } 401 cp = point[curpos]; 402 switch (type(cp)) { 403 case CHAR: 404 k = (int) right(cp); 405 if (isyms[k] == 0 && symbol[k] == 0) { 406 symbol[k] = 1; 407 ssyms[ssmax++] = k; 408 } 409 break; 410 case DOT: 411 for (k=1; k<NCHARS; k++) { 412 if (k != HAT) { 413 if (isyms[k] == 0 && symbol[k] == 0) { 414 symbol[k] = 1; 415 ssyms[ssmax++] = k; 416 } 417 } 418 } 419 break; 420 case CCL: 421 for (p = (char *) right(cp); *p; p++) { 422 if (*p != HAT) { 423 if (isyms[*p] == 0 && symbol[*p] == 0) { 424 symbol[*p] = 1; 425 ssyms[ssmax++] = *p; 426 } 427 } 428 } 429 break; 430 case NCCL: 431 for (k=1; k<NCHARS; k++) { 432 if (k != HAT && !member(k, (char *) right(cp))) { 433 if (isyms[k] == 0 && symbol[k] == 0) { 434 symbol[k] = 1; 435 ssyms[ssmax++] = k; 436 } 437 } 438 } 439 } 440 } 441 for (j=0; j<ssmax; j++) { /* nextstate(s, ssyms[j]) */ 442 c = ssyms[j]; 443 symbol[c] = 0; 444 setcnt = 0; 445 for (k=0; k<=line; k++) setvec[k] = 0; 446 for (i=0; i<spmax; i++) { 447 index[sposns[i]] = 0; 448 cp = point[sposns[i]]; 449 if ((k = type(cp)) != FINAL) 450 if (k == CHAR && c == (int) right(cp) 451 || k == DOT 452 || k == CCL && member(c, (char *) right(cp)) 453 || k == NCCL && !member(c, (char *) right(cp))) { 454 ptr = foll[sposns[i]]; 455 num = *ptr; 456 for (k=0; k<num; k++) { 457 if (setvec[*(++ptr)] != 1 458 && iposns[*ptr] != 1) { 459 setvec[*ptr] = 1; 460 setcnt++; 461 } 462 } 463 } 464 } /* end nextstate */ 465 if (notin(state, n, &prev)) { 466 if (n >= NSTATES) { 467 dprintf("cgotofn: notin; state = %d, n = %d\n", state, n, NULL); 468 overflo(); 469 } 470 state[++n] = add(setcnt); 471 dprintf(" delta(%d,%o) = %d", s,c,n); 472 dprintf(", ind = %d\n", ind+1, NULL, NULL); 473 fatab[++ind] = c; 474 fatab[++ind] = n; 475 numtrans++; 476 } 477 else { 478 if (prev != 0) { 479 dprintf(" delta(%d,%o) = %d", s,c,prev); 480 dprintf(", ind = %d\n", ind+1, NULL, NULL); 481 fatab[++ind] = c; 482 fatab[++ind] = prev; 483 numtrans++; 484 } 485 } 486 } 487 tenter: 488 if ((pfa = (struct fa *) malloc((numtrans + 1) * sizeof(struct fa))) == NULL) 489 overflo(); 490 where[s] = pfa; 491 if (finflg) 492 pfa->cch = -1; /* s is a final state */ 493 else 494 pfa->cch = numtrans; 495 pfa->st = 0; 496 for (i=1, pfa += 1; i<=numtrans; i++, pfa++) { 497 pfa->cch = fatab[2*i-1]; 498 pfa->st = (struct fa *) fatab[2*i]; 499 } 500 } 501 for (i=0; i<=n; i++) { 502 /* N.b. state[0] == foll[0], not separately allocated */ 503 if (i>0) 504 xfree(state[i]); /* free state[i] */ 505 pfa = where[i]; 506 pfa->st = where[0]; 507 dprintf("state %d: (%o)\n", i, pfa, NULL); 508 dprintf(" numtrans = %d, default = %o\n", pfa->cch, pfa->st, NULL); 509 for (k=1; k<=pfa->cch; k++) { 510 (pfa+k)->st = where[ (int) (pfa+k)->st]; 511 dprintf(" char = %o, nextstate = %o\n",(pfa+k)->cch, (pfa+k)->st, NULL); 512 } 513 } 514 pfa = where[0]; 515 if ((num = pfa->cch) < 0) 516 return(where[0]); 517 for (pfa += num; num; num--, pfa--) 518 if (pfa->cch == HAT) { 519 return(pfa->st); 520 } 521 return(where[0]); 522 } 523 524 match(pfa, p) 525 register struct fa *pfa; 526 register char *p; 527 { 528 register count; 529 char c; 530 if (p == 0) return(0); 531 if (pfa->cch == 1) { /* fast test for first character, if possible */ 532 c = (++pfa)->cch; 533 do 534 if (c == *p) { 535 p++; 536 pfa = pfa->st; 537 goto adv; 538 } 539 while (*p++ != 0); 540 return(0); 541 } 542 adv: if ((count = pfa->cch) < 0) return(1); 543 do { 544 for (pfa += count; count; count--, pfa--) 545 if (pfa->cch == *p) { 546 break; 547 } 548 pfa = pfa->st; 549 if ((count = pfa->cch) < 0) return(1); 550 } while(*p++ != 0); 551 return(0); 552 } 553 554 /* 555 * Free foll[i], taking into account identical foll[] entries. 556 * This is necessary because cfoll() uses the same physical follow set for 557 * several foll[] entries when the set is identical. Called by freetr(). 558 */ 559 foll_free(i) 560 int i; 561 { 562 register int j; 563 int *p = foll[i]; 564 if (p==NULL) return; 565 for (j=0; j<=maxfoll; j++) 566 if (foll[j]==p) foll[j]=NULL; 567 xfree(p); 568 } 569