1 /* b.c 4.1 82/05/07 */ 2 3 #include "awk.def" 4 #include "stdio.h" 5 #include "awk.h" 6 7 extern node *op2(); 8 extern struct fa *cgotofn(); 9 #define MAXLIN 256 10 #define NCHARS 128 11 #define NSTATES 256 12 13 #define type(v) v->nobj 14 #define left(v) v->narg[0] 15 #define right(v) v->narg[1] 16 #define parent(v) v->nnext 17 18 #define LEAF case CCL: case NCCL: case CHAR: case DOT: 19 #define UNARY case FINAL: case STAR: case PLUS: case QUEST: 20 21 /* encoding in tree nodes: 22 leaf (CCL, NCCL, CHAR, DOT): left is index, right contains value or pointer to value 23 unary (FINAL, STAR, PLUS, QUEST): left is child, right is null 24 binary (CAT, OR): left and right are children 25 parent contains pointer to parent 26 */ 27 28 struct fa { 29 int cch; 30 struct fa *st; 31 }; 32 33 int *state[NSTATES]; 34 int *foll[MAXLIN]; 35 char chars[MAXLIN]; 36 int setvec[MAXLIN]; 37 node *point[MAXLIN]; 38 39 int setcnt; 40 int line; 41 42 43 struct fa *makedfa(p) /* returns dfa for tree pointed to by p */ 44 node *p; 45 { 46 node *p1; 47 struct fa *fap; 48 p1 = op2(CAT, op2(STAR, op2(DOT, (node *) 0, (node *) 0), (node *) 0), p); 49 /* put DOT STAR in front of reg. exp. */ 50 p1 = op2(FINAL, p1, (node *) 0); /* install FINAL node */ 51 52 line = 0; 53 penter(p1); /* enter parent pointers and leaf indices */ 54 point[line] = p1; /* FINAL node */ 55 setvec[0] = 1; /* for initial DOT STAR */ 56 cfoll(p1); /* set up follow sets */ 57 fap = cgotofn(); 58 freetr(p1); /* add this when alloc works */ 59 return(fap); 60 } 61 62 penter(p) /* set up parent pointers and leaf indices */ 63 node *p; 64 { 65 switch(type(p)) { 66 LEAF 67 left(p) = (node *) line; 68 point[line++] = p; 69 break; 70 UNARY 71 penter(left(p)); 72 parent(left(p)) = p; 73 break; 74 case CAT: 75 case OR: 76 penter(left(p)); 77 penter(right(p)); 78 parent(left(p)) = p; 79 parent(right(p)) = p; 80 break; 81 default: 82 error(FATAL, "unknown type %d in penter\n", type(p)); 83 break; 84 } 85 } 86 87 freetr(p) /* free parse tree and follow sets */ 88 node *p; 89 { 90 switch(type(p)) { 91 LEAF 92 xfree(foll[(int) left(p)]); 93 xfree(p); 94 break; 95 UNARY 96 freetr(left(p)); 97 xfree(p); 98 break; 99 case CAT: 100 case OR: 101 freetr(left(p)); 102 freetr(right(p)); 103 xfree(p); 104 break; 105 default: 106 error(FATAL, "unknown type %d in freetr", type(p)); 107 break; 108 } 109 } 110 char *cclenter(p) 111 register char *p; 112 { 113 register i, c; 114 char *op; 115 116 op = p; 117 i = 0; 118 while ((c = *p++) != 0) { 119 if (c == '-' && i > 0 && chars[i-1] != 0) { 120 if (*p != 0) { 121 c = chars[i-1]; 122 while (c < *p) { 123 if (i >= MAXLIN) 124 overflo(); 125 chars[i++] = ++c; 126 } 127 p++; 128 continue; 129 } 130 } 131 if (i >= MAXLIN) 132 overflo(); 133 chars[i++] = c; 134 } 135 chars[i++] = '\0'; 136 dprintf("cclenter: in = |%s|, out = |%s|\n", op, chars, NULL); 137 xfree(op); 138 return(tostring(chars)); 139 } 140 141 overflo() 142 { 143 error(FATAL, "regular expression too long\n"); 144 } 145 146 cfoll(v) /* enter follow set of each leaf of vertex v into foll[leaf] */ 147 register node *v; 148 { 149 register i; 150 int prev; 151 int *add(); 152 153 switch(type(v)) { 154 LEAF 155 setcnt = 0; 156 for (i=1; i<=line; i++) 157 setvec[i] = 0; 158 follow(v); 159 if (notin(foll, ( (int) left(v))-1, &prev)) { 160 foll[(int) left(v)] = add(setcnt); 161 } 162 else 163 foll[ (int) left(v)] = foll[prev]; 164 break; 165 UNARY 166 cfoll(left(v)); 167 break; 168 case CAT: 169 case OR: 170 cfoll(left(v)); 171 cfoll(right(v)); 172 break; 173 default: 174 error(FATAL, "unknown type %d in cfoll", type(v)); 175 } 176 } 177 178 first(p) /* collects initially active leaves of p into setvec */ 179 register node *p; /* returns 0 or 1 depending on whether p matches empty string */ 180 { 181 register b; 182 183 switch(type(p)) { 184 LEAF 185 if (setvec[(int) left(p)] != 1) { 186 setvec[(int) left(p)] = 1; 187 setcnt++; 188 } 189 if (type(p) == CCL && (*(char *) right(p)) == '\0') 190 return(0); /* empty CCL */ 191 else return(1); 192 case FINAL: 193 case PLUS: 194 if (first(left(p)) == 0) return(0); 195 return(1); 196 case STAR: 197 case QUEST: 198 first(left(p)); 199 return(0); 200 case CAT: 201 if (first(left(p)) == 0 && first(right(p)) == 0) return(0); 202 return(1); 203 case OR: 204 b = first(right(p)); 205 if (first(left(p)) == 0 || b == 0) return(0); 206 return(1); 207 } 208 error(FATAL, "unknown type %d in first\n", type(p)); 209 return(-1); 210 } 211 212 follow(v) 213 node *v; /* collects leaves that can follow v into setvec */ 214 { 215 node *p; 216 217 if (type(v) == FINAL) 218 return; 219 p = parent(v); 220 switch (type(p)) { 221 case STAR: 222 case PLUS: first(v); 223 follow(p); 224 return; 225 226 case OR: 227 case QUEST: follow(p); 228 return; 229 230 case CAT: if (v == left(p)) { /* v is left child of p */ 231 if (first(right(p)) == 0) { 232 follow(p); 233 return; 234 } 235 } 236 else /* v is right child */ 237 follow(p); 238 return; 239 case FINAL: if (setvec[line] != 1) { 240 setvec[line] = 1; 241 setcnt++; 242 } 243 return; 244 } 245 } 246 247 member(c, s) /* is c in s? */ 248 register char c, *s; 249 { 250 while (*s) 251 if (c == *s++) 252 return(1); 253 return(0); 254 } 255 256 notin(array, n, prev) /* is setvec in array[0] thru array[n]? */ 257 int **array; 258 int *prev; { 259 register i, j; 260 int *ptr; 261 for (i=0; i<=n; i++) { 262 ptr = array[i]; 263 if (*ptr == setcnt) { 264 for (j=0; j < setcnt; j++) 265 if (setvec[*(++ptr)] != 1) goto nxt; 266 *prev = i; 267 return(0); 268 } 269 nxt: ; 270 } 271 return(1); 272 } 273 274 int *add(n) { /* remember setvec */ 275 int *ptr, *p; 276 register i; 277 if ((p = ptr = (int *) malloc((n+1)*sizeof(int))) == NULL) 278 overflo(); 279 *ptr = n; 280 dprintf("add(%d)\n", n, NULL, NULL); 281 for (i=1; i <= line; i++) 282 if (setvec[i] == 1) { 283 *(++ptr) = i; 284 dprintf(" ptr = %o, *ptr = %d, i = %d\n", ptr, *ptr, i); 285 } 286 dprintf("\n", NULL, NULL, NULL); 287 return(p); 288 } 289 290 struct fa *cgotofn() 291 { 292 register i, k; 293 register int *ptr; 294 char c; 295 char *p; 296 node *cp; 297 int j, n, s, ind, numtrans; 298 int finflg; 299 int curpos, num, prev; 300 struct fa *where[NSTATES]; 301 302 int fatab[257]; 303 struct fa *pfa; 304 305 char index[MAXLIN]; 306 char iposns[MAXLIN]; 307 int sposns[MAXLIN]; 308 int spmax, spinit; 309 310 char symbol[NCHARS]; 311 char isyms[NCHARS]; 312 char ssyms[NCHARS]; 313 int ssmax, ssinit; 314 315 for (i=0; i<=line; i++) index[i] = iposns[i] = setvec[i] = 0; 316 for (i=0; i<NCHARS; i++) isyms[i] = symbol[i] = 0; 317 setcnt = 0; 318 /* compute initial positions and symbols of state 0 */ 319 ssmax = 0; 320 ptr = state[0] = foll[0]; 321 spinit = *ptr; 322 for (i=0; i<spinit; i++) { 323 curpos = *(++ptr); 324 sposns[i] = curpos; 325 iposns[curpos] = 1; 326 cp = point[curpos]; 327 dprintf("i = %d, spinit = %d, curpos = %d\n", i, spinit, curpos); 328 switch (type(cp)) { 329 case CHAR: 330 k = (int) right(cp); 331 if (isyms[k] != 1) { 332 isyms[k] = 1; 333 ssyms[ssmax++] = k; 334 } 335 break; 336 case DOT: 337 for (k=1; k<NCHARS; k++) { 338 if (k != HAT) { 339 if (isyms[k] != 1) { 340 isyms[k] = 1; 341 ssyms[ssmax++] = k; 342 } 343 } 344 } 345 break; 346 case CCL: 347 for (p = (char *) right(cp); *p; p++) { 348 if (*p != HAT) { 349 if (isyms[*p] != 1) { 350 isyms[*p] = 1; 351 ssyms[ssmax++] = *p; 352 } 353 } 354 } 355 break; 356 case NCCL: 357 for (k=1; k<NCHARS; k++) { 358 if (k != HAT && !member(k, (char *) right(cp))) { 359 if (isyms[k] != 1) { 360 isyms[k] = 1; 361 ssyms[ssmax++] = k; 362 } 363 } 364 } 365 } 366 } 367 ssinit = ssmax; 368 n = 0; 369 for (s=0; s<=n; s++) { 370 dprintf("s = %d\n", s, NULL, NULL); 371 ind = 0; 372 numtrans = 0; 373 finflg = 0; 374 if (*(state[s] + *state[s]) == line) { /* s final? */ 375 finflg = 1; 376 goto tenter; 377 } 378 spmax = spinit; 379 ssmax = ssinit; 380 ptr = state[s]; 381 num = *ptr; 382 for (i=0; i<num; i++) { 383 curpos = *(++ptr); 384 if (iposns[curpos] != 1 && index[curpos] != 1) { 385 index[curpos] = 1; 386 sposns[spmax++] = curpos; 387 } 388 cp = point[curpos]; 389 switch (type(cp)) { 390 case CHAR: 391 k = (int) right(cp); 392 if (isyms[k] == 0 && symbol[k] == 0) { 393 symbol[k] = 1; 394 ssyms[ssmax++] = k; 395 } 396 break; 397 case DOT: 398 for (k=1; k<NCHARS; k++) { 399 if (k != HAT) { 400 if (isyms[k] == 0 && symbol[k] == 0) { 401 symbol[k] = 1; 402 ssyms[ssmax++] = k; 403 } 404 } 405 } 406 break; 407 case CCL: 408 for (p = (char *) right(cp); *p; p++) { 409 if (*p != HAT) { 410 if (isyms[*p] == 0 && symbol[*p] == 0) { 411 symbol[*p] = 1; 412 ssyms[ssmax++] = *p; 413 } 414 } 415 } 416 break; 417 case NCCL: 418 for (k=1; k<NCHARS; k++) { 419 if (k != HAT && !member(k, (char *) right(cp))) { 420 if (isyms[k] == 0 && symbol[k] == 0) { 421 symbol[k] = 1; 422 ssyms[ssmax++] = k; 423 } 424 } 425 } 426 } 427 } 428 for (j=0; j<ssmax; j++) { /* nextstate(s, ssyms[j]) */ 429 c = ssyms[j]; 430 symbol[c] = 0; 431 setcnt = 0; 432 for (k=0; k<=line; k++) setvec[k] = 0; 433 for (i=0; i<spmax; i++) { 434 index[sposns[i]] = 0; 435 cp = point[sposns[i]]; 436 if ((k = type(cp)) != FINAL) 437 if (k == CHAR && c == (int) right(cp) 438 || k == DOT 439 || k == CCL && member(c, (char *) right(cp)) 440 || k == NCCL && !member(c, (char *) right(cp))) { 441 ptr = foll[sposns[i]]; 442 num = *ptr; 443 for (k=0; k<num; k++) { 444 if (setvec[*(++ptr)] != 1 445 && iposns[*ptr] != 1) { 446 setvec[*ptr] = 1; 447 setcnt++; 448 } 449 } 450 } 451 } /* end nextstate */ 452 if (notin(state, n, &prev)) { 453 if (n >= NSTATES) { 454 dprintf("cgotofn: notin; state = %d, n = %d\n", state, n, NULL); 455 overflo(); 456 } 457 state[++n] = add(setcnt); 458 dprintf(" delta(%d,%o) = %d", s,c,n); 459 dprintf(", ind = %d\n", ind+1, NULL, NULL); 460 fatab[++ind] = c; 461 fatab[++ind] = n; 462 numtrans++; 463 } 464 else { 465 if (prev != 0) { 466 dprintf(" delta(%d,%o) = %d", s,c,prev); 467 dprintf(", ind = %d\n", ind+1, NULL, NULL); 468 fatab[++ind] = c; 469 fatab[++ind] = prev; 470 numtrans++; 471 } 472 } 473 } 474 tenter: 475 if ((pfa = (struct fa *) malloc((numtrans + 1) * sizeof(struct fa))) == NULL) 476 overflo(); 477 where[s] = pfa; 478 if (finflg) 479 pfa->cch = -1; /* s is a final state */ 480 else 481 pfa->cch = numtrans; 482 pfa->st = 0; 483 for (i=1, pfa += 1; i<=numtrans; i++, pfa++) { 484 pfa->cch = fatab[2*i-1]; 485 pfa->st = (struct fa *) fatab[2*i]; 486 } 487 } 488 for (i=0; i<=n; i++) { 489 xfree(state[i]); /* free state[i] */ 490 pfa = where[i]; 491 pfa->st = where[0]; 492 dprintf("state %d: (%o)\n", i, pfa, NULL); 493 dprintf(" numtrans = %d, default = %o\n", pfa->cch, pfa->st, NULL); 494 for (k=1; k<=pfa->cch; k++) { 495 (pfa+k)->st = where[ (int) (pfa+k)->st]; 496 dprintf(" char = %o, nextstate = %o\n",(pfa+k)->cch, (pfa+k)->st, NULL); 497 } 498 } 499 pfa = where[0]; 500 if ((num = pfa->cch) < 0) 501 return(where[0]); 502 for (pfa += num; num; num--, pfa--) 503 if (pfa->cch == HAT) { 504 return(pfa->st); 505 } 506 return(where[0]); 507 } 508 509 match(pfa, p) 510 register struct fa *pfa; 511 register char *p; 512 { 513 register count; 514 char c; 515 if (p == 0) return(0); 516 if (pfa->cch == 1) { /* fast test for first character, if possible */ 517 c = (++pfa)->cch; 518 do 519 if (c == *p) { 520 p++; 521 pfa = pfa->st; 522 goto adv; 523 } 524 while (*p++ != 0); 525 return(0); 526 } 527 adv: if ((count = pfa->cch) < 0) return(1); 528 do { 529 for (pfa += count; count; count--, pfa--) 530 if (pfa->cch == *p) { 531 break; 532 } 533 pfa = pfa->st; 534 if ((count = pfa->cch) < 0) return(1); 535 } while(*p++ != 0); 536 return(0); 537 } 538