1 /* $NetBSD: dfa.c,v 1.1.1.2 2013/04/06 14:05:42 christos Exp $ */ 2 3 /* dfa - DFA construction routines */ 4 5 /* Copyright (c) 1990 The Regents of the University of California. */ 6 /* All rights reserved. */ 7 8 /* This code is derived from software contributed to Berkeley by */ 9 /* Vern Paxson. */ 10 11 /* The United States Government has rights in this work pursuant */ 12 /* to contract no. DE-AC03-76SF00098 between the United States */ 13 /* Department of Energy and the University of California. */ 14 15 /* Redistribution and use in source and binary forms, with or without */ 16 /* modification, are permitted provided that the following conditions */ 17 /* are met: */ 18 19 /* 1. Redistributions of source code must retain the above copyright */ 20 /* notice, this list of conditions and the following disclaimer. */ 21 /* 2. Redistributions in binary form must reproduce the above copyright */ 22 /* notice, this list of conditions and the following disclaimer in the */ 23 /* documentation and/or other materials provided with the distribution. */ 24 25 /* Neither the name of the University nor the names of its contributors */ 26 /* may be used to endorse or promote products derived from this software */ 27 /* without specific prior written permission. */ 28 29 /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */ 30 /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */ 31 /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ 32 /* PURPOSE. */ 33 34 #include "flexdef.h" 35 #include "tables.h" 36 37 /* declare functions that have forward references */ 38 39 void dump_associated_rules PROTO ((FILE *, int)); 40 void dump_transitions PROTO ((FILE *, int[])); 41 void sympartition PROTO ((int[], int, int[], int[])); 42 int symfollowset PROTO ((int[], int, int, int[])); 43 44 45 /* check_for_backing_up - check a DFA state for backing up 46 * 47 * synopsis 48 * void check_for_backing_up( int ds, int state[numecs] ); 49 * 50 * ds is the number of the state to check and state[] is its out-transitions, 51 * indexed by equivalence class. 52 */ 53 54 void check_for_backing_up (ds, state) 55 int ds; 56 int state[]; 57 { 58 if ((reject && !dfaacc[ds].dfaacc_set) || (!reject && !dfaacc[ds].dfaacc_state)) { /* state is non-accepting */ 59 ++num_backing_up; 60 61 if (backing_up_report) { 62 fprintf (backing_up_file, 63 _("State #%d is non-accepting -\n"), ds); 64 65 /* identify the state */ 66 dump_associated_rules (backing_up_file, ds); 67 68 /* Now identify it further using the out- and 69 * jam-transitions. 70 */ 71 dump_transitions (backing_up_file, state); 72 73 putc ('\n', backing_up_file); 74 } 75 } 76 } 77 78 79 /* check_trailing_context - check to see if NFA state set constitutes 80 * "dangerous" trailing context 81 * 82 * synopsis 83 * void check_trailing_context( int nfa_states[num_states+1], int num_states, 84 * int accset[nacc+1], int nacc ); 85 * 86 * NOTES 87 * Trailing context is "dangerous" if both the head and the trailing 88 * part are of variable size \and/ there's a DFA state which contains 89 * both an accepting state for the head part of the rule and NFA states 90 * which occur after the beginning of the trailing context. 91 * 92 * When such a rule is matched, it's impossible to tell if having been 93 * in the DFA state indicates the beginning of the trailing context or 94 * further-along scanning of the pattern. In these cases, a warning 95 * message is issued. 96 * 97 * nfa_states[1 .. num_states] is the list of NFA states in the DFA. 98 * accset[1 .. nacc] is the list of accepting numbers for the DFA state. 99 */ 100 101 void check_trailing_context (nfa_states, num_states, accset, nacc) 102 int *nfa_states, num_states; 103 int *accset; 104 int nacc; 105 { 106 register int i, j; 107 108 for (i = 1; i <= num_states; ++i) { 109 int ns = nfa_states[i]; 110 register int type = state_type[ns]; 111 register int ar = assoc_rule[ns]; 112 113 if (type == STATE_NORMAL || rule_type[ar] != RULE_VARIABLE) { /* do nothing */ 114 } 115 116 else if (type == STATE_TRAILING_CONTEXT) { 117 /* Potential trouble. Scan set of accepting numbers 118 * for the one marking the end of the "head". We 119 * assume that this looping will be fairly cheap 120 * since it's rare that an accepting number set 121 * is large. 122 */ 123 for (j = 1; j <= nacc; ++j) 124 if (accset[j] & YY_TRAILING_HEAD_MASK) { 125 line_warning (_ 126 ("dangerous trailing context"), 127 rule_linenum[ar]); 128 return; 129 } 130 } 131 } 132 } 133 134 135 /* dump_associated_rules - list the rules associated with a DFA state 136 * 137 * Goes through the set of NFA states associated with the DFA and 138 * extracts the first MAX_ASSOC_RULES unique rules, sorts them, 139 * and writes a report to the given file. 140 */ 141 142 void dump_associated_rules (file, ds) 143 FILE *file; 144 int ds; 145 { 146 register int i, j; 147 register int num_associated_rules = 0; 148 int rule_set[MAX_ASSOC_RULES + 1]; 149 int *dset = dss[ds]; 150 int size = dfasiz[ds]; 151 152 for (i = 1; i <= size; ++i) { 153 register int rule_num = rule_linenum[assoc_rule[dset[i]]]; 154 155 for (j = 1; j <= num_associated_rules; ++j) 156 if (rule_num == rule_set[j]) 157 break; 158 159 if (j > num_associated_rules) { /* new rule */ 160 if (num_associated_rules < MAX_ASSOC_RULES) 161 rule_set[++num_associated_rules] = 162 rule_num; 163 } 164 } 165 166 qsort (&rule_set [1], num_associated_rules, sizeof (rule_set [1]), intcmp); 167 168 fprintf (file, _(" associated rule line numbers:")); 169 170 for (i = 1; i <= num_associated_rules; ++i) { 171 if (i % 8 == 1) 172 putc ('\n', file); 173 174 fprintf (file, "\t%d", rule_set[i]); 175 } 176 177 putc ('\n', file); 178 } 179 180 181 /* dump_transitions - list the transitions associated with a DFA state 182 * 183 * synopsis 184 * dump_transitions( FILE *file, int state[numecs] ); 185 * 186 * Goes through the set of out-transitions and lists them in human-readable 187 * form (i.e., not as equivalence classes); also lists jam transitions 188 * (i.e., all those which are not out-transitions, plus EOF). The dump 189 * is done to the given file. 190 */ 191 192 void dump_transitions (file, state) 193 FILE *file; 194 int state[]; 195 { 196 register int i, ec; 197 int out_char_set[CSIZE]; 198 199 for (i = 0; i < csize; ++i) { 200 ec = ABS (ecgroup[i]); 201 out_char_set[i] = state[ec]; 202 } 203 204 fprintf (file, _(" out-transitions: ")); 205 206 list_character_set (file, out_char_set); 207 208 /* now invert the members of the set to get the jam transitions */ 209 for (i = 0; i < csize; ++i) 210 out_char_set[i] = !out_char_set[i]; 211 212 fprintf (file, _("\n jam-transitions: EOF ")); 213 214 list_character_set (file, out_char_set); 215 216 putc ('\n', file); 217 } 218 219 220 /* epsclosure - construct the epsilon closure of a set of ndfa states 221 * 222 * synopsis 223 * int *epsclosure( int t[num_states], int *numstates_addr, 224 * int accset[num_rules+1], int *nacc_addr, 225 * int *hashval_addr ); 226 * 227 * NOTES 228 * The epsilon closure is the set of all states reachable by an arbitrary 229 * number of epsilon transitions, which themselves do not have epsilon 230 * transitions going out, unioned with the set of states which have non-null 231 * accepting numbers. t is an array of size numstates of nfa state numbers. 232 * Upon return, t holds the epsilon closure and *numstates_addr is updated. 233 * accset holds a list of the accepting numbers, and the size of accset is 234 * given by *nacc_addr. t may be subjected to reallocation if it is not 235 * large enough to hold the epsilon closure. 236 * 237 * hashval is the hash value for the dfa corresponding to the state set. 238 */ 239 240 int *epsclosure (t, ns_addr, accset, nacc_addr, hv_addr) 241 int *t, *ns_addr, accset[], *nacc_addr, *hv_addr; 242 { 243 register int stkpos, ns, tsp; 244 int numstates = *ns_addr, nacc, hashval, transsym, nfaccnum; 245 int stkend, nstate; 246 static int did_stk_init = false, *stk; 247 248 #define MARK_STATE(state) \ 249 do{ trans1[state] = trans1[state] - MARKER_DIFFERENCE;} while(0) 250 251 #define IS_MARKED(state) (trans1[state] < 0) 252 253 #define UNMARK_STATE(state) \ 254 do{ trans1[state] = trans1[state] + MARKER_DIFFERENCE;} while(0) 255 256 #define CHECK_ACCEPT(state) \ 257 do{ \ 258 nfaccnum = accptnum[state]; \ 259 if ( nfaccnum != NIL ) \ 260 accset[++nacc] = nfaccnum; \ 261 }while(0) 262 263 #define DO_REALLOCATION() \ 264 do { \ 265 current_max_dfa_size += MAX_DFA_SIZE_INCREMENT; \ 266 ++num_reallocs; \ 267 t = reallocate_integer_array( t, current_max_dfa_size ); \ 268 stk = reallocate_integer_array( stk, current_max_dfa_size ); \ 269 }while(0) \ 270 271 #define PUT_ON_STACK(state) \ 272 do { \ 273 if ( ++stkend >= current_max_dfa_size ) \ 274 DO_REALLOCATION(); \ 275 stk[stkend] = state; \ 276 MARK_STATE(state); \ 277 }while(0) 278 279 #define ADD_STATE(state) \ 280 do { \ 281 if ( ++numstates >= current_max_dfa_size ) \ 282 DO_REALLOCATION(); \ 283 t[numstates] = state; \ 284 hashval += state; \ 285 }while(0) 286 287 #define STACK_STATE(state) \ 288 do { \ 289 PUT_ON_STACK(state); \ 290 CHECK_ACCEPT(state); \ 291 if ( nfaccnum != NIL || transchar[state] != SYM_EPSILON ) \ 292 ADD_STATE(state); \ 293 }while(0) 294 295 296 if (!did_stk_init) { 297 stk = allocate_integer_array (current_max_dfa_size); 298 did_stk_init = true; 299 } 300 301 nacc = stkend = hashval = 0; 302 303 for (nstate = 1; nstate <= numstates; ++nstate) { 304 ns = t[nstate]; 305 306 /* The state could be marked if we've already pushed it onto 307 * the stack. 308 */ 309 if (!IS_MARKED (ns)) { 310 PUT_ON_STACK (ns); 311 CHECK_ACCEPT (ns); 312 hashval += ns; 313 } 314 } 315 316 for (stkpos = 1; stkpos <= stkend; ++stkpos) { 317 ns = stk[stkpos]; 318 transsym = transchar[ns]; 319 320 if (transsym == SYM_EPSILON) { 321 tsp = trans1[ns] + MARKER_DIFFERENCE; 322 323 if (tsp != NO_TRANSITION) { 324 if (!IS_MARKED (tsp)) 325 STACK_STATE (tsp); 326 327 tsp = trans2[ns]; 328 329 if (tsp != NO_TRANSITION 330 && !IS_MARKED (tsp)) 331 STACK_STATE (tsp); 332 } 333 } 334 } 335 336 /* Clear out "visit" markers. */ 337 338 for (stkpos = 1; stkpos <= stkend; ++stkpos) { 339 if (IS_MARKED (stk[stkpos])) 340 UNMARK_STATE (stk[stkpos]); 341 else 342 flexfatal (_ 343 ("consistency check failed in epsclosure()")); 344 } 345 346 *ns_addr = numstates; 347 *hv_addr = hashval; 348 *nacc_addr = nacc; 349 350 return t; 351 } 352 353 354 /* increase_max_dfas - increase the maximum number of DFAs */ 355 356 void increase_max_dfas () 357 { 358 current_max_dfas += MAX_DFAS_INCREMENT; 359 360 ++num_reallocs; 361 362 base = reallocate_integer_array (base, current_max_dfas); 363 def = reallocate_integer_array (def, current_max_dfas); 364 dfasiz = reallocate_integer_array (dfasiz, current_max_dfas); 365 accsiz = reallocate_integer_array (accsiz, current_max_dfas); 366 dhash = reallocate_integer_array (dhash, current_max_dfas); 367 dss = reallocate_int_ptr_array (dss, current_max_dfas); 368 dfaacc = reallocate_dfaacc_union (dfaacc, current_max_dfas); 369 370 if (nultrans) 371 nultrans = 372 reallocate_integer_array (nultrans, 373 current_max_dfas); 374 } 375 376 377 /* ntod - convert an ndfa to a dfa 378 * 379 * Creates the dfa corresponding to the ndfa we've constructed. The 380 * dfa starts out in state #1. 381 */ 382 383 void ntod () 384 { 385 int *accset, ds, nacc, newds; 386 int sym, hashval, numstates, dsize; 387 int num_full_table_rows=0; /* used only for -f */ 388 int *nset, *dset; 389 int targptr, totaltrans, i, comstate, comfreq, targ; 390 int symlist[CSIZE + 1]; 391 int num_start_states; 392 int todo_head, todo_next; 393 394 struct yytbl_data *yynxt_tbl = 0; 395 flex_int32_t *yynxt_data = 0, yynxt_curr = 0; 396 397 /* Note that the following are indexed by *equivalence classes* 398 * and not by characters. Since equivalence classes are indexed 399 * beginning with 1, even if the scanner accepts NUL's, this 400 * means that (since every character is potentially in its own 401 * equivalence class) these arrays must have room for indices 402 * from 1 to CSIZE, so their size must be CSIZE + 1. 403 */ 404 int duplist[CSIZE + 1], state[CSIZE + 1]; 405 int targfreq[CSIZE + 1], targstate[CSIZE + 1]; 406 407 /* accset needs to be large enough to hold all of the rules present 408 * in the input, *plus* their YY_TRAILING_HEAD_MASK variants. 409 */ 410 accset = allocate_integer_array ((num_rules + 1) * 2); 411 nset = allocate_integer_array (current_max_dfa_size); 412 413 /* The "todo" queue is represented by the head, which is the DFA 414 * state currently being processed, and the "next", which is the 415 * next DFA state number available (not in use). We depend on the 416 * fact that snstods() returns DFA's \in increasing order/, and thus 417 * need only know the bounds of the dfas to be processed. 418 */ 419 todo_head = todo_next = 0; 420 421 for (i = 0; i <= csize; ++i) { 422 duplist[i] = NIL; 423 symlist[i] = false; 424 } 425 426 for (i = 0; i <= num_rules; ++i) 427 accset[i] = NIL; 428 429 if (trace) { 430 dumpnfa (scset[1]); 431 fputs (_("\n\nDFA Dump:\n\n"), stderr); 432 } 433 434 inittbl (); 435 436 /* Check to see whether we should build a separate table for 437 * transitions on NUL characters. We don't do this for full-speed 438 * (-F) scanners, since for them we don't have a simple state 439 * number lying around with which to index the table. We also 440 * don't bother doing it for scanners unless (1) NUL is in its own 441 * equivalence class (indicated by a positive value of 442 * ecgroup[NUL]), (2) NUL's equivalence class is the last 443 * equivalence class, and (3) the number of equivalence classes is 444 * the same as the number of characters. This latter case comes 445 * about when useecs is false or when it's true but every character 446 * still manages to land in its own class (unlikely, but it's 447 * cheap to check for). If all these things are true then the 448 * character code needed to represent NUL's equivalence class for 449 * indexing the tables is going to take one more bit than the 450 * number of characters, and therefore we won't be assured of 451 * being able to fit it into a YY_CHAR variable. This rules out 452 * storing the transitions in a compressed table, since the code 453 * for interpreting them uses a YY_CHAR variable (perhaps it 454 * should just use an integer, though; this is worth pondering ... 455 * ###). 456 * 457 * Finally, for full tables, we want the number of entries in the 458 * table to be a power of two so the array references go fast (it 459 * will just take a shift to compute the major index). If 460 * encoding NUL's transitions in the table will spoil this, we 461 * give it its own table (note that this will be the case if we're 462 * not using equivalence classes). 463 */ 464 465 /* Note that the test for ecgroup[0] == numecs below accomplishes 466 * both (1) and (2) above 467 */ 468 if (!fullspd && ecgroup[0] == numecs) { 469 /* NUL is alone in its equivalence class, which is the 470 * last one. 471 */ 472 int use_NUL_table = (numecs == csize); 473 474 if (fulltbl && !use_NUL_table) { 475 /* We still may want to use the table if numecs 476 * is a power of 2. 477 */ 478 int power_of_two; 479 480 for (power_of_two = 1; power_of_two <= csize; 481 power_of_two *= 2) 482 if (numecs == power_of_two) { 483 use_NUL_table = true; 484 break; 485 } 486 } 487 488 if (use_NUL_table) 489 nultrans = 490 allocate_integer_array (current_max_dfas); 491 492 /* From now on, nultrans != nil indicates that we're 493 * saving null transitions for later, separate encoding. 494 */ 495 } 496 497 498 if (fullspd) { 499 for (i = 0; i <= numecs; ++i) 500 state[i] = 0; 501 502 place_state (state, 0, 0); 503 dfaacc[0].dfaacc_state = 0; 504 } 505 506 else if (fulltbl) { 507 if (nultrans) 508 /* We won't be including NUL's transitions in the 509 * table, so build it for entries from 0 .. numecs - 1. 510 */ 511 num_full_table_rows = numecs; 512 513 else 514 /* Take into account the fact that we'll be including 515 * the NUL entries in the transition table. Build it 516 * from 0 .. numecs. 517 */ 518 num_full_table_rows = numecs + 1; 519 520 /* Begin generating yy_nxt[][] 521 * This spans the entire LONG function. 522 * This table is tricky because we don't know how big it will be. 523 * So we'll have to realloc() on the way... 524 * we'll wait until we can calculate yynxt_tbl->td_hilen. 525 */ 526 yynxt_tbl = 527 (struct yytbl_data *) calloc (1, 528 sizeof (struct 529 yytbl_data)); 530 yytbl_data_init (yynxt_tbl, YYTD_ID_NXT); 531 yynxt_tbl->td_hilen = 1; 532 yynxt_tbl->td_lolen = num_full_table_rows; 533 yynxt_tbl->td_data = yynxt_data = 534 (flex_int32_t *) calloc (yynxt_tbl->td_lolen * 535 yynxt_tbl->td_hilen, 536 sizeof (flex_int32_t)); 537 yynxt_curr = 0; 538 539 buf_prints (&yydmap_buf, 540 "\t{YYTD_ID_NXT, (void**)&yy_nxt, sizeof(%s)},\n", 541 long_align ? "flex_int32_t" : "flex_int16_t"); 542 543 /* Unless -Ca, declare it "short" because it's a real 544 * long-shot that that won't be large enough. 545 */ 546 if (gentables) 547 out_str_dec 548 ("static yyconst %s yy_nxt[][%d] =\n {\n", 549 long_align ? "flex_int32_t" : "flex_int16_t", 550 num_full_table_rows); 551 else { 552 out_dec ("#undef YY_NXT_LOLEN\n#define YY_NXT_LOLEN (%d)\n", num_full_table_rows); 553 out_str ("static yyconst %s *yy_nxt =0;\n", 554 long_align ? "flex_int32_t" : "flex_int16_t"); 555 } 556 557 558 if (gentables) 559 outn (" {"); 560 561 /* Generate 0 entries for state #0. */ 562 for (i = 0; i < num_full_table_rows; ++i) { 563 mk2data (0); 564 yynxt_data[yynxt_curr++] = 0; 565 } 566 567 dataflush (); 568 if (gentables) 569 outn (" },\n"); 570 } 571 572 /* Create the first states. */ 573 574 num_start_states = lastsc * 2; 575 576 for (i = 1; i <= num_start_states; ++i) { 577 numstates = 1; 578 579 /* For each start condition, make one state for the case when 580 * we're at the beginning of the line (the '^' operator) and 581 * one for the case when we're not. 582 */ 583 if (i % 2 == 1) 584 nset[numstates] = scset[(i / 2) + 1]; 585 else 586 nset[numstates] = 587 mkbranch (scbol[i / 2], scset[i / 2]); 588 589 nset = epsclosure (nset, &numstates, accset, &nacc, 590 &hashval); 591 592 if (snstods (nset, numstates, accset, nacc, hashval, &ds)) { 593 numas += nacc; 594 totnst += numstates; 595 ++todo_next; 596 597 if (variable_trailing_context_rules && nacc > 0) 598 check_trailing_context (nset, numstates, 599 accset, nacc); 600 } 601 } 602 603 if (!fullspd) { 604 if (!snstods (nset, 0, accset, 0, 0, &end_of_buffer_state)) 605 flexfatal (_ 606 ("could not create unique end-of-buffer state")); 607 608 ++numas; 609 ++num_start_states; 610 ++todo_next; 611 } 612 613 614 while (todo_head < todo_next) { 615 targptr = 0; 616 totaltrans = 0; 617 618 for (i = 1; i <= numecs; ++i) 619 state[i] = 0; 620 621 ds = ++todo_head; 622 623 dset = dss[ds]; 624 dsize = dfasiz[ds]; 625 626 if (trace) 627 fprintf (stderr, _("state # %d:\n"), ds); 628 629 sympartition (dset, dsize, symlist, duplist); 630 631 for (sym = 1; sym <= numecs; ++sym) { 632 if (symlist[sym]) { 633 symlist[sym] = 0; 634 635 if (duplist[sym] == NIL) { 636 /* Symbol has unique out-transitions. */ 637 numstates = 638 symfollowset (dset, dsize, 639 sym, nset); 640 nset = epsclosure (nset, 641 &numstates, 642 accset, &nacc, 643 &hashval); 644 645 if (snstods 646 (nset, numstates, accset, nacc, 647 hashval, &newds)) { 648 totnst = totnst + 649 numstates; 650 ++todo_next; 651 numas += nacc; 652 653 if (variable_trailing_context_rules && nacc > 0) 654 check_trailing_context 655 (nset, 656 numstates, 657 accset, 658 nacc); 659 } 660 661 state[sym] = newds; 662 663 if (trace) 664 fprintf (stderr, 665 "\t%d\t%d\n", sym, 666 newds); 667 668 targfreq[++targptr] = 1; 669 targstate[targptr] = newds; 670 ++numuniq; 671 } 672 673 else { 674 /* sym's equivalence class has the same 675 * transitions as duplist(sym)'s 676 * equivalence class. 677 */ 678 targ = state[duplist[sym]]; 679 state[sym] = targ; 680 681 if (trace) 682 fprintf (stderr, 683 "\t%d\t%d\n", sym, 684 targ); 685 686 /* Update frequency count for 687 * destination state. 688 */ 689 690 i = 0; 691 while (targstate[++i] != targ) ; 692 693 ++targfreq[i]; 694 ++numdup; 695 } 696 697 ++totaltrans; 698 duplist[sym] = NIL; 699 } 700 } 701 702 703 numsnpairs += totaltrans; 704 705 if (ds > num_start_states) 706 check_for_backing_up (ds, state); 707 708 if (nultrans) { 709 nultrans[ds] = state[NUL_ec]; 710 state[NUL_ec] = 0; /* remove transition */ 711 } 712 713 if (fulltbl) { 714 715 /* Each time we hit here, it's another td_hilen, so we realloc. */ 716 yynxt_tbl->td_hilen++; 717 yynxt_tbl->td_data = yynxt_data = 718 (flex_int32_t *) realloc (yynxt_data, 719 yynxt_tbl->td_hilen * 720 yynxt_tbl->td_lolen * 721 sizeof (flex_int32_t)); 722 723 724 if (gentables) 725 outn (" {"); 726 727 /* Supply array's 0-element. */ 728 if (ds == end_of_buffer_state) { 729 mk2data (-end_of_buffer_state); 730 yynxt_data[yynxt_curr++] = 731 -end_of_buffer_state; 732 } 733 else { 734 mk2data (end_of_buffer_state); 735 yynxt_data[yynxt_curr++] = 736 end_of_buffer_state; 737 } 738 739 for (i = 1; i < num_full_table_rows; ++i) { 740 /* Jams are marked by negative of state 741 * number. 742 */ 743 mk2data (state[i] ? state[i] : -ds); 744 yynxt_data[yynxt_curr++] = 745 state[i] ? state[i] : -ds; 746 } 747 748 dataflush (); 749 if (gentables) 750 outn (" },\n"); 751 } 752 753 else if (fullspd) 754 place_state (state, ds, totaltrans); 755 756 else if (ds == end_of_buffer_state) 757 /* Special case this state to make sure it does what 758 * it's supposed to, i.e., jam on end-of-buffer. 759 */ 760 stack1 (ds, 0, 0, JAMSTATE); 761 762 else { /* normal, compressed state */ 763 764 /* Determine which destination state is the most 765 * common, and how many transitions to it there are. 766 */ 767 768 comfreq = 0; 769 comstate = 0; 770 771 for (i = 1; i <= targptr; ++i) 772 if (targfreq[i] > comfreq) { 773 comfreq = targfreq[i]; 774 comstate = targstate[i]; 775 } 776 777 bldtbl (state, ds, totaltrans, comstate, comfreq); 778 } 779 } 780 781 if (fulltbl) { 782 dataend (); 783 if (tablesext) { 784 yytbl_data_compress (yynxt_tbl); 785 if (yytbl_data_fwrite (&tableswr, yynxt_tbl) < 0) 786 flexerror (_ 787 ("Could not write yynxt_tbl[][]")); 788 } 789 if (yynxt_tbl) { 790 yytbl_data_destroy (yynxt_tbl); 791 yynxt_tbl = 0; 792 } 793 } 794 795 else if (!fullspd) { 796 cmptmps (); /* create compressed template entries */ 797 798 /* Create tables for all the states with only one 799 * out-transition. 800 */ 801 while (onesp > 0) { 802 mk1tbl (onestate[onesp], onesym[onesp], 803 onenext[onesp], onedef[onesp]); 804 --onesp; 805 } 806 807 mkdeftbl (); 808 } 809 810 flex_free ((void *) accset); 811 flex_free ((void *) nset); 812 } 813 814 815 /* snstods - converts a set of ndfa states into a dfa state 816 * 817 * synopsis 818 * is_new_state = snstods( int sns[numstates], int numstates, 819 * int accset[num_rules+1], int nacc, 820 * int hashval, int *newds_addr ); 821 * 822 * On return, the dfa state number is in newds. 823 */ 824 825 int snstods (sns, numstates, accset, nacc, hashval, newds_addr) 826 int sns[], numstates, accset[], nacc, hashval, *newds_addr; 827 { 828 int didsort = 0; 829 register int i, j; 830 int newds, *oldsns; 831 832 for (i = 1; i <= lastdfa; ++i) 833 if (hashval == dhash[i]) { 834 if (numstates == dfasiz[i]) { 835 oldsns = dss[i]; 836 837 if (!didsort) { 838 /* We sort the states in sns so we 839 * can compare it to oldsns quickly. 840 */ 841 qsort (&sns [1], numstates, sizeof (sns [1]), intcmp); 842 didsort = 1; 843 } 844 845 for (j = 1; j <= numstates; ++j) 846 if (sns[j] != oldsns[j]) 847 break; 848 849 if (j > numstates) { 850 ++dfaeql; 851 *newds_addr = i; 852 return 0; 853 } 854 855 ++hshcol; 856 } 857 858 else 859 ++hshsave; 860 } 861 862 /* Make a new dfa. */ 863 864 if (++lastdfa >= current_max_dfas) 865 increase_max_dfas (); 866 867 newds = lastdfa; 868 869 dss[newds] = allocate_integer_array (numstates + 1); 870 871 /* If we haven't already sorted the states in sns, we do so now, 872 * so that future comparisons with it can be made quickly. 873 */ 874 875 if (!didsort) 876 qsort (&sns [1], numstates, sizeof (sns [1]), intcmp); 877 878 for (i = 1; i <= numstates; ++i) 879 dss[newds][i] = sns[i]; 880 881 dfasiz[newds] = numstates; 882 dhash[newds] = hashval; 883 884 if (nacc == 0) { 885 if (reject) 886 dfaacc[newds].dfaacc_set = (int *) 0; 887 else 888 dfaacc[newds].dfaacc_state = 0; 889 890 accsiz[newds] = 0; 891 } 892 893 else if (reject) { 894 /* We sort the accepting set in increasing order so the 895 * disambiguating rule that the first rule listed is considered 896 * match in the event of ties will work. 897 */ 898 899 qsort (&accset [1], nacc, sizeof (accset [1]), intcmp); 900 901 dfaacc[newds].dfaacc_set = 902 allocate_integer_array (nacc + 1); 903 904 /* Save the accepting set for later */ 905 for (i = 1; i <= nacc; ++i) { 906 dfaacc[newds].dfaacc_set[i] = accset[i]; 907 908 if (accset[i] <= num_rules) 909 /* Who knows, perhaps a REJECT can yield 910 * this rule. 911 */ 912 rule_useful[accset[i]] = true; 913 } 914 915 accsiz[newds] = nacc; 916 } 917 918 else { 919 /* Find lowest numbered rule so the disambiguating rule 920 * will work. 921 */ 922 j = num_rules + 1; 923 924 for (i = 1; i <= nacc; ++i) 925 if (accset[i] < j) 926 j = accset[i]; 927 928 dfaacc[newds].dfaacc_state = j; 929 930 if (j <= num_rules) 931 rule_useful[j] = true; 932 } 933 934 *newds_addr = newds; 935 936 return 1; 937 } 938 939 940 /* symfollowset - follow the symbol transitions one step 941 * 942 * synopsis 943 * numstates = symfollowset( int ds[current_max_dfa_size], int dsize, 944 * int transsym, int nset[current_max_dfa_size] ); 945 */ 946 947 int symfollowset (ds, dsize, transsym, nset) 948 int ds[], dsize, transsym, nset[]; 949 { 950 int ns, tsp, sym, i, j, lenccl, ch, numstates, ccllist; 951 952 numstates = 0; 953 954 for (i = 1; i <= dsize; ++i) { /* for each nfa state ns in the state set of ds */ 955 ns = ds[i]; 956 sym = transchar[ns]; 957 tsp = trans1[ns]; 958 959 if (sym < 0) { /* it's a character class */ 960 sym = -sym; 961 ccllist = cclmap[sym]; 962 lenccl = ccllen[sym]; 963 964 if (cclng[sym]) { 965 for (j = 0; j < lenccl; ++j) { 966 /* Loop through negated character 967 * class. 968 */ 969 ch = ccltbl[ccllist + j]; 970 971 if (ch == 0) 972 ch = NUL_ec; 973 974 if (ch > transsym) 975 /* Transsym isn't in negated 976 * ccl. 977 */ 978 break; 979 980 else if (ch == transsym) 981 /* next 2 */ 982 goto bottom; 983 } 984 985 /* Didn't find transsym in ccl. */ 986 nset[++numstates] = tsp; 987 } 988 989 else 990 for (j = 0; j < lenccl; ++j) { 991 ch = ccltbl[ccllist + j]; 992 993 if (ch == 0) 994 ch = NUL_ec; 995 996 if (ch > transsym) 997 break; 998 else if (ch == transsym) { 999 nset[++numstates] = tsp; 1000 break; 1001 } 1002 } 1003 } 1004 1005 else if (sym == SYM_EPSILON) { /* do nothing */ 1006 } 1007 1008 else if (ABS (ecgroup[sym]) == transsym) 1009 nset[++numstates] = tsp; 1010 1011 bottom:; 1012 } 1013 1014 return numstates; 1015 } 1016 1017 1018 /* sympartition - partition characters with same out-transitions 1019 * 1020 * synopsis 1021 * sympartition( int ds[current_max_dfa_size], int numstates, 1022 * int symlist[numecs], int duplist[numecs] ); 1023 */ 1024 1025 void sympartition (ds, numstates, symlist, duplist) 1026 int ds[], numstates; 1027 int symlist[], duplist[]; 1028 { 1029 int tch, i, j, k, ns, dupfwd[CSIZE + 1], lenccl, cclp, ich; 1030 1031 /* Partitioning is done by creating equivalence classes for those 1032 * characters which have out-transitions from the given state. Thus 1033 * we are really creating equivalence classes of equivalence classes. 1034 */ 1035 1036 for (i = 1; i <= numecs; ++i) { /* initialize equivalence class list */ 1037 duplist[i] = i - 1; 1038 dupfwd[i] = i + 1; 1039 } 1040 1041 duplist[1] = NIL; 1042 dupfwd[numecs] = NIL; 1043 1044 for (i = 1; i <= numstates; ++i) { 1045 ns = ds[i]; 1046 tch = transchar[ns]; 1047 1048 if (tch != SYM_EPSILON) { 1049 if (tch < -lastccl || tch >= csize) { 1050 flexfatal (_ 1051 ("bad transition character detected in sympartition()")); 1052 } 1053 1054 if (tch >= 0) { /* character transition */ 1055 int ec = ecgroup[tch]; 1056 1057 mkechar (ec, dupfwd, duplist); 1058 symlist[ec] = 1; 1059 } 1060 1061 else { /* character class */ 1062 tch = -tch; 1063 1064 lenccl = ccllen[tch]; 1065 cclp = cclmap[tch]; 1066 mkeccl (ccltbl + cclp, lenccl, dupfwd, 1067 duplist, numecs, NUL_ec); 1068 1069 if (cclng[tch]) { 1070 j = 0; 1071 1072 for (k = 0; k < lenccl; ++k) { 1073 ich = ccltbl[cclp + k]; 1074 1075 if (ich == 0) 1076 ich = NUL_ec; 1077 1078 for (++j; j < ich; ++j) 1079 symlist[j] = 1; 1080 } 1081 1082 for (++j; j <= numecs; ++j) 1083 symlist[j] = 1; 1084 } 1085 1086 else 1087 for (k = 0; k < lenccl; ++k) { 1088 ich = ccltbl[cclp + k]; 1089 1090 if (ich == 0) 1091 ich = NUL_ec; 1092 1093 symlist[ich] = 1; 1094 } 1095 } 1096 } 1097 } 1098 } 1099