1 /* $OpenBSD: dfa.c,v 1.5 2001/11/19 19:02:14 mpech Exp $ */ 2 3 /* dfa - DFA construction routines */ 4 5 /*- 6 * Copyright (c) 1990 The Regents of the University of California. 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * Vern Paxson. 11 * 12 * The United States Government has rights in this work pursuant 13 * to contract no. DE-AC03-76SF00098 between the United States 14 * Department of Energy and the University of California. 15 * 16 * Redistribution and use in source and binary forms, with or without 17 * modification, are permitted provided that: (1) source distributions 18 * retain this entire copyright notice and comment, and (2) distributions 19 * including binaries display the following acknowledgement: ``This product 20 * includes software developed by the University of California, Berkeley 21 * and its contributors'' in the documentation or other materials provided 22 * with the distribution and in all advertising materials mentioning 23 * features or use of this software. Neither the name of the University nor 24 * the names of its contributors may be used to endorse or promote products 25 * derived from this software without specific prior written permission. 26 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED 27 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 28 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 29 */ 30 31 /* $Header: /home/cvs/src/usr.bin/lex/dfa.c,v 1.5 2001/11/19 19:02:14 mpech Exp $ */ 32 33 #include "flexdef.h" 34 35 36 /* declare functions that have forward references */ 37 38 void dump_associated_rules PROTO((FILE*, int)); 39 void dump_transitions PROTO((FILE*, int[])); 40 void sympartition PROTO((int[], int, int[], int[])); 41 int symfollowset PROTO((int[], int, int, int[])); 42 43 44 /* check_for_backing_up - check a DFA state for backing up 45 * 46 * synopsis 47 * void check_for_backing_up( int ds, int state[numecs] ); 48 * 49 * ds is the number of the state to check and state[] is its out-transitions, 50 * indexed by equivalence class. 51 */ 52 53 void check_for_backing_up( ds, state ) 54 int ds; 55 int state[]; 56 { 57 if ( (reject && ! dfaacc[ds].dfaacc_set) || 58 (! reject && ! dfaacc[ds].dfaacc_state) ) 59 { /* state is non-accepting */ 60 ++num_backing_up; 61 62 if ( backing_up_report ) 63 { 64 fprintf( backing_up_file, 65 _( "State #%d is non-accepting -\n" ), ds ); 66 67 /* identify the state */ 68 dump_associated_rules( backing_up_file, ds ); 69 70 /* Now identify it further using the out- and 71 * jam-transitions. 72 */ 73 dump_transitions( backing_up_file, state ); 74 75 putc( '\n', backing_up_file ); 76 } 77 } 78 } 79 80 81 /* check_trailing_context - check to see if NFA state set constitutes 82 * "dangerous" trailing context 83 * 84 * synopsis 85 * void check_trailing_context( int nfa_states[num_states+1], int num_states, 86 * int accset[nacc+1], int nacc ); 87 * 88 * NOTES 89 * Trailing context is "dangerous" if both the head and the trailing 90 * part are of variable size \and/ there's a DFA state which contains 91 * both an accepting state for the head part of the rule and NFA states 92 * which occur after the beginning of the trailing context. 93 * 94 * When such a rule is matched, it's impossible to tell if having been 95 * in the DFA state indicates the beginning of the trailing context or 96 * further-along scanning of the pattern. In these cases, a warning 97 * message is issued. 98 * 99 * nfa_states[1 .. num_states] is the list of NFA states in the DFA. 100 * accset[1 .. nacc] is the list of accepting numbers for the DFA state. 101 */ 102 103 void check_trailing_context( nfa_states, num_states, accset, nacc ) 104 int *nfa_states, num_states; 105 int *accset; 106 int nacc; 107 { 108 int i, j; 109 110 for ( i = 1; i <= num_states; ++i ) 111 { 112 int ns = nfa_states[i]; 113 int type = state_type[ns]; 114 int ar = assoc_rule[ns]; 115 116 if ( type == STATE_NORMAL || rule_type[ar] != RULE_VARIABLE ) 117 { /* do nothing */ 118 } 119 120 else if ( type == STATE_TRAILING_CONTEXT ) 121 { 122 /* Potential trouble. Scan set of accepting numbers 123 * for the one marking the end of the "head". We 124 * assume that this looping will be fairly cheap 125 * since it's rare that an accepting number set 126 * is large. 127 */ 128 for ( j = 1; j <= nacc; ++j ) 129 if ( accset[j] & YY_TRAILING_HEAD_MASK ) 130 { 131 line_warning( 132 _( "dangerous trailing context" ), 133 rule_linenum[ar] ); 134 return; 135 } 136 } 137 } 138 } 139 140 141 /* dump_associated_rules - list the rules associated with a DFA state 142 * 143 * Goes through the set of NFA states associated with the DFA and 144 * extracts the first MAX_ASSOC_RULES unique rules, sorts them, 145 * and writes a report to the given file. 146 */ 147 148 void dump_associated_rules( file, ds ) 149 FILE *file; 150 int ds; 151 { 152 int i, j; 153 int num_associated_rules = 0; 154 int rule_set[MAX_ASSOC_RULES + 1]; 155 int *dset = dss[ds]; 156 int size = dfasiz[ds]; 157 158 for ( i = 1; i <= size; ++i ) 159 { 160 int rule_num = rule_linenum[assoc_rule[dset[i]]]; 161 162 for ( j = 1; j <= num_associated_rules; ++j ) 163 if ( rule_num == rule_set[j] ) 164 break; 165 166 if ( j > num_associated_rules ) 167 { /* new rule */ 168 if ( num_associated_rules < MAX_ASSOC_RULES ) 169 rule_set[++num_associated_rules] = rule_num; 170 } 171 } 172 173 bubble( rule_set, num_associated_rules ); 174 175 fprintf( file, _( " associated rule line numbers:" ) ); 176 177 for ( i = 1; i <= num_associated_rules; ++i ) 178 { 179 if ( i % 8 == 1 ) 180 putc( '\n', file ); 181 182 fprintf( file, "\t%d", rule_set[i] ); 183 } 184 185 putc( '\n', file ); 186 } 187 188 189 /* dump_transitions - list the transitions associated with a DFA state 190 * 191 * synopsis 192 * dump_transitions( FILE *file, int state[numecs] ); 193 * 194 * Goes through the set of out-transitions and lists them in human-readable 195 * form (i.e., not as equivalence classes); also lists jam transitions 196 * (i.e., all those which are not out-transitions, plus EOF). The dump 197 * is done to the given file. 198 */ 199 200 void dump_transitions( file, state ) 201 FILE *file; 202 int state[]; 203 { 204 int i, ec; 205 int out_char_set[CSIZE]; 206 207 for ( i = 0; i < csize; ++i ) 208 { 209 ec = ABS( ecgroup[i] ); 210 out_char_set[i] = state[ec]; 211 } 212 213 fprintf( file, _( " out-transitions: " ) ); 214 215 list_character_set( file, out_char_set ); 216 217 /* now invert the members of the set to get the jam transitions */ 218 for ( i = 0; i < csize; ++i ) 219 out_char_set[i] = ! out_char_set[i]; 220 221 fprintf( file, _( "\n jam-transitions: EOF " ) ); 222 223 list_character_set( file, out_char_set ); 224 225 putc( '\n', file ); 226 } 227 228 229 /* epsclosure - construct the epsilon closure of a set of ndfa states 230 * 231 * synopsis 232 * int *epsclosure( int t[num_states], int *numstates_addr, 233 * int accset[num_rules+1], int *nacc_addr, 234 * int *hashval_addr ); 235 * 236 * NOTES 237 * The epsilon closure is the set of all states reachable by an arbitrary 238 * number of epsilon transitions, which themselves do not have epsilon 239 * transitions going out, unioned with the set of states which have non-null 240 * accepting numbers. t is an array of size numstates of nfa state numbers. 241 * Upon return, t holds the epsilon closure and *numstates_addr is updated. 242 * accset holds a list of the accepting numbers, and the size of accset is 243 * given by *nacc_addr. t may be subjected to reallocation if it is not 244 * large enough to hold the epsilon closure. 245 * 246 * hashval is the hash value for the dfa corresponding to the state set. 247 */ 248 249 int *epsclosure( t, ns_addr, accset, nacc_addr, hv_addr ) 250 int *t, *ns_addr, accset[], *nacc_addr, *hv_addr; 251 { 252 int stkpos, ns, tsp; 253 int numstates = *ns_addr, nacc, hashval, transsym, nfaccnum; 254 int stkend, nstate; 255 static int did_stk_init = false, *stk; 256 257 #define MARK_STATE(state) \ 258 trans1[state] = trans1[state] - MARKER_DIFFERENCE; 259 260 #define IS_MARKED(state) (trans1[state] < 0) 261 262 #define UNMARK_STATE(state) \ 263 trans1[state] = trans1[state] + MARKER_DIFFERENCE; 264 265 #define CHECK_ACCEPT(state) \ 266 { \ 267 nfaccnum = accptnum[state]; \ 268 if ( nfaccnum != NIL ) \ 269 accset[++nacc] = nfaccnum; \ 270 } 271 272 #define DO_REALLOCATION \ 273 { \ 274 current_max_dfa_size += MAX_DFA_SIZE_INCREMENT; \ 275 ++num_reallocs; \ 276 t = reallocate_integer_array( t, current_max_dfa_size ); \ 277 stk = reallocate_integer_array( stk, current_max_dfa_size ); \ 278 } \ 279 280 #define PUT_ON_STACK(state) \ 281 { \ 282 if ( ++stkend >= current_max_dfa_size ) \ 283 DO_REALLOCATION \ 284 stk[stkend] = state; \ 285 MARK_STATE(state) \ 286 } 287 288 #define ADD_STATE(state) \ 289 { \ 290 if ( ++numstates >= current_max_dfa_size ) \ 291 DO_REALLOCATION \ 292 t[numstates] = state; \ 293 hashval += state; \ 294 } 295 296 #define STACK_STATE(state) \ 297 { \ 298 PUT_ON_STACK(state) \ 299 CHECK_ACCEPT(state) \ 300 if ( nfaccnum != NIL || transchar[state] != SYM_EPSILON ) \ 301 ADD_STATE(state) \ 302 } 303 304 305 if ( ! did_stk_init ) 306 { 307 stk = allocate_integer_array( current_max_dfa_size ); 308 did_stk_init = true; 309 } 310 311 nacc = stkend = hashval = 0; 312 313 for ( nstate = 1; nstate <= numstates; ++nstate ) 314 { 315 ns = t[nstate]; 316 317 /* The state could be marked if we've already pushed it onto 318 * the stack. 319 */ 320 if ( ! IS_MARKED(ns) ) 321 { 322 PUT_ON_STACK(ns) 323 CHECK_ACCEPT(ns) 324 hashval += ns; 325 } 326 } 327 328 for ( stkpos = 1; stkpos <= stkend; ++stkpos ) 329 { 330 ns = stk[stkpos]; 331 transsym = transchar[ns]; 332 333 if ( transsym == SYM_EPSILON ) 334 { 335 tsp = trans1[ns] + MARKER_DIFFERENCE; 336 337 if ( tsp != NO_TRANSITION ) 338 { 339 if ( ! IS_MARKED(tsp) ) 340 STACK_STATE(tsp) 341 342 tsp = trans2[ns]; 343 344 if ( tsp != NO_TRANSITION && ! IS_MARKED(tsp) ) 345 STACK_STATE(tsp) 346 } 347 } 348 } 349 350 /* Clear out "visit" markers. */ 351 352 for ( stkpos = 1; stkpos <= stkend; ++stkpos ) 353 { 354 if ( IS_MARKED(stk[stkpos]) ) 355 UNMARK_STATE(stk[stkpos]) 356 else 357 flexfatal( 358 _( "consistency check failed in epsclosure()" ) ); 359 } 360 361 *ns_addr = numstates; 362 *hv_addr = hashval; 363 *nacc_addr = nacc; 364 365 return t; 366 } 367 368 369 /* increase_max_dfas - increase the maximum number of DFAs */ 370 371 void increase_max_dfas() 372 { 373 current_max_dfas += MAX_DFAS_INCREMENT; 374 375 ++num_reallocs; 376 377 base = reallocate_integer_array( base, current_max_dfas ); 378 def = reallocate_integer_array( def, current_max_dfas ); 379 dfasiz = reallocate_integer_array( dfasiz, current_max_dfas ); 380 accsiz = reallocate_integer_array( accsiz, current_max_dfas ); 381 dhash = reallocate_integer_array( dhash, current_max_dfas ); 382 dss = reallocate_int_ptr_array( dss, current_max_dfas ); 383 dfaacc = reallocate_dfaacc_union( dfaacc, current_max_dfas ); 384 385 if ( nultrans ) 386 nultrans = 387 reallocate_integer_array( nultrans, current_max_dfas ); 388 } 389 390 391 /* ntod - convert an ndfa to a dfa 392 * 393 * Creates the dfa corresponding to the ndfa we've constructed. The 394 * dfa starts out in state #1. 395 */ 396 397 void ntod() 398 { 399 int *accset, ds, nacc, newds; 400 int sym, hashval, numstates, dsize; 401 int num_full_table_rows; /* used only for -f */ 402 int *nset, *dset; 403 int targptr, totaltrans, i, comstate, comfreq, targ; 404 int symlist[CSIZE + 1]; 405 int num_start_states; 406 int todo_head, todo_next; 407 408 /* Note that the following are indexed by *equivalence classes* 409 * and not by characters. Since equivalence classes are indexed 410 * beginning with 1, even if the scanner accepts NUL's, this 411 * means that (since every character is potentially in its own 412 * equivalence class) these arrays must have room for indices 413 * from 1 to CSIZE, so their size must be CSIZE + 1. 414 */ 415 int duplist[CSIZE + 1], state[CSIZE + 1]; 416 int targfreq[CSIZE + 1], targstate[CSIZE + 1]; 417 418 accset = allocate_integer_array( num_rules + 1 ); 419 nset = allocate_integer_array( current_max_dfa_size ); 420 421 /* The "todo" queue is represented by the head, which is the DFA 422 * state currently being processed, and the "next", which is the 423 * next DFA state number available (not in use). We depend on the 424 * fact that snstods() returns DFA's \in increasing order/, and thus 425 * need only know the bounds of the dfas to be processed. 426 */ 427 todo_head = todo_next = 0; 428 429 for ( i = 0; i <= csize; ++i ) 430 { 431 duplist[i] = NIL; 432 symlist[i] = false; 433 } 434 435 for ( i = 0; i <= num_rules; ++i ) 436 accset[i] = NIL; 437 438 if ( trace ) 439 { 440 dumpnfa( scset[1] ); 441 fputs( _( "\n\nDFA Dump:\n\n" ), stderr ); 442 } 443 444 inittbl(); 445 446 /* Check to see whether we should build a separate table for 447 * transitions on NUL characters. We don't do this for full-speed 448 * (-F) scanners, since for them we don't have a simple state 449 * number lying around with which to index the table. We also 450 * don't bother doing it for scanners unless (1) NUL is in its own 451 * equivalence class (indicated by a positive value of 452 * ecgroup[NUL]), (2) NUL's equivalence class is the last 453 * equivalence class, and (3) the number of equivalence classes is 454 * the same as the number of characters. This latter case comes 455 * about when useecs is false or when it's true but every character 456 * still manages to land in its own class (unlikely, but it's 457 * cheap to check for). If all these things are true then the 458 * character code needed to represent NUL's equivalence class for 459 * indexing the tables is going to take one more bit than the 460 * number of characters, and therefore we won't be assured of 461 * being able to fit it into a YY_CHAR variable. This rules out 462 * storing the transitions in a compressed table, since the code 463 * for interpreting them uses a YY_CHAR variable (perhaps it 464 * should just use an integer, though; this is worth pondering ... 465 * ###). 466 * 467 * Finally, for full tables, we want the number of entries in the 468 * table to be a power of two so the array references go fast (it 469 * will just take a shift to compute the major index). If 470 * encoding NUL's transitions in the table will spoil this, we 471 * give it its own table (note that this will be the case if we're 472 * not using equivalence classes). 473 */ 474 475 /* Note that the test for ecgroup[0] == numecs below accomplishes 476 * both (1) and (2) above 477 */ 478 if ( ! fullspd && ecgroup[0] == numecs ) 479 { 480 /* NUL is alone in its equivalence class, which is the 481 * last one. 482 */ 483 int use_NUL_table = (numecs == csize); 484 485 if ( fulltbl && ! use_NUL_table ) 486 { 487 /* We still may want to use the table if numecs 488 * is a power of 2. 489 */ 490 int power_of_two; 491 492 for ( power_of_two = 1; power_of_two <= csize; 493 power_of_two *= 2 ) 494 if ( numecs == power_of_two ) 495 { 496 use_NUL_table = true; 497 break; 498 } 499 } 500 501 if ( use_NUL_table ) 502 nultrans = allocate_integer_array( current_max_dfas ); 503 504 /* From now on, nultrans != nil indicates that we're 505 * saving null transitions for later, separate encoding. 506 */ 507 } 508 509 510 if ( fullspd ) 511 { 512 for ( i = 0; i <= numecs; ++i ) 513 state[i] = 0; 514 515 place_state( state, 0, 0 ); 516 dfaacc[0].dfaacc_state = 0; 517 } 518 519 else if ( fulltbl ) 520 { 521 if ( nultrans ) 522 /* We won't be including NUL's transitions in the 523 * table, so build it for entries from 0 .. numecs - 1. 524 */ 525 num_full_table_rows = numecs; 526 527 else 528 /* Take into account the fact that we'll be including 529 * the NUL entries in the transition table. Build it 530 * from 0 .. numecs. 531 */ 532 num_full_table_rows = numecs + 1; 533 534 /* Unless -Ca, declare it "short" because it's a real 535 * long-shot that that won't be large enough. 536 */ 537 out_str_dec( "static yyconst %s yy_nxt[][%d] =\n {\n", 538 /* '}' so vi doesn't get too confused */ 539 long_align ? "long" : "short", num_full_table_rows ); 540 541 outn( " {" ); 542 543 /* Generate 0 entries for state #0. */ 544 for ( i = 0; i < num_full_table_rows; ++i ) 545 mk2data( 0 ); 546 547 dataflush(); 548 outn( " },\n" ); 549 } 550 551 /* Create the first states. */ 552 553 num_start_states = lastsc * 2; 554 555 for ( i = 1; i <= num_start_states; ++i ) 556 { 557 numstates = 1; 558 559 /* For each start condition, make one state for the case when 560 * we're at the beginning of the line (the '^' operator) and 561 * one for the case when we're not. 562 */ 563 if ( i % 2 == 1 ) 564 nset[numstates] = scset[(i / 2) + 1]; 565 else 566 nset[numstates] = 567 mkbranch( scbol[i / 2], scset[i / 2] ); 568 569 nset = epsclosure( nset, &numstates, accset, &nacc, &hashval ); 570 571 if ( snstods( nset, numstates, accset, nacc, hashval, &ds ) ) 572 { 573 numas += nacc; 574 totnst += numstates; 575 ++todo_next; 576 577 if ( variable_trailing_context_rules && nacc > 0 ) 578 check_trailing_context( nset, numstates, 579 accset, nacc ); 580 } 581 } 582 583 if ( ! fullspd ) 584 { 585 if ( ! snstods( nset, 0, accset, 0, 0, &end_of_buffer_state ) ) 586 flexfatal( 587 _( "could not create unique end-of-buffer state" ) ); 588 589 ++numas; 590 ++num_start_states; 591 ++todo_next; 592 } 593 594 while ( todo_head < todo_next ) 595 { 596 targptr = 0; 597 totaltrans = 0; 598 599 for ( i = 1; i <= numecs; ++i ) 600 state[i] = 0; 601 602 ds = ++todo_head; 603 604 dset = dss[ds]; 605 dsize = dfasiz[ds]; 606 607 if ( trace ) 608 fprintf( stderr, _( "state # %d:\n" ), ds ); 609 610 sympartition( dset, dsize, symlist, duplist ); 611 612 for ( sym = 1; sym <= numecs; ++sym ) 613 { 614 if ( symlist[sym] ) 615 { 616 symlist[sym] = 0; 617 618 if ( duplist[sym] == NIL ) 619 { 620 /* Symbol has unique out-transitions. */ 621 numstates = symfollowset( dset, dsize, 622 sym, nset ); 623 nset = epsclosure( nset, &numstates, 624 accset, &nacc, &hashval ); 625 626 if ( snstods( nset, numstates, accset, 627 nacc, hashval, &newds ) ) 628 { 629 totnst = totnst + numstates; 630 ++todo_next; 631 numas += nacc; 632 633 if ( 634 variable_trailing_context_rules && 635 nacc > 0 ) 636 check_trailing_context( 637 nset, numstates, 638 accset, nacc ); 639 } 640 641 state[sym] = newds; 642 643 if ( trace ) 644 fprintf( stderr, "\t%d\t%d\n", 645 sym, newds ); 646 647 targfreq[++targptr] = 1; 648 targstate[targptr] = newds; 649 ++numuniq; 650 } 651 652 else 653 { 654 /* sym's equivalence class has the same 655 * transitions as duplist(sym)'s 656 * equivalence class. 657 */ 658 targ = state[duplist[sym]]; 659 state[sym] = targ; 660 661 if ( trace ) 662 fprintf( stderr, "\t%d\t%d\n", 663 sym, targ ); 664 665 /* Update frequency count for 666 * destination state. 667 */ 668 669 i = 0; 670 while ( targstate[++i] != targ ) 671 ; 672 673 ++targfreq[i]; 674 ++numdup; 675 } 676 677 ++totaltrans; 678 duplist[sym] = NIL; 679 } 680 } 681 682 if ( caseins && ! useecs ) 683 { 684 int j; 685 686 for ( i = 'A', j = 'a'; i <= 'Z'; ++i, ++j ) 687 { 688 if ( state[i] == 0 && state[j] != 0 ) 689 /* We're adding a transition. */ 690 ++totaltrans; 691 692 else if ( state[i] != 0 && state[j] == 0 ) 693 /* We're taking away a transition. */ 694 --totaltrans; 695 696 state[i] = state[j]; 697 } 698 } 699 700 numsnpairs += totaltrans; 701 702 if ( ds > num_start_states ) 703 check_for_backing_up( ds, state ); 704 705 if ( nultrans ) 706 { 707 nultrans[ds] = state[NUL_ec]; 708 state[NUL_ec] = 0; /* remove transition */ 709 } 710 711 if ( fulltbl ) 712 { 713 outn( " {" ); 714 715 /* Supply array's 0-element. */ 716 if ( ds == end_of_buffer_state ) 717 mk2data( -end_of_buffer_state ); 718 else 719 mk2data( end_of_buffer_state ); 720 721 for ( i = 1; i < num_full_table_rows; ++i ) 722 /* Jams are marked by negative of state 723 * number. 724 */ 725 mk2data( state[i] ? state[i] : -ds ); 726 727 dataflush(); 728 outn( " },\n" ); 729 } 730 731 else if ( fullspd ) 732 place_state( state, ds, totaltrans ); 733 734 else if ( ds == end_of_buffer_state ) 735 /* Special case this state to make sure it does what 736 * it's supposed to, i.e., jam on end-of-buffer. 737 */ 738 stack1( ds, 0, 0, JAMSTATE ); 739 740 else /* normal, compressed state */ 741 { 742 /* Determine which destination state is the most 743 * common, and how many transitions to it there are. 744 */ 745 746 comfreq = 0; 747 comstate = 0; 748 749 for ( i = 1; i <= targptr; ++i ) 750 if ( targfreq[i] > comfreq ) 751 { 752 comfreq = targfreq[i]; 753 comstate = targstate[i]; 754 } 755 756 bldtbl( state, ds, totaltrans, comstate, comfreq ); 757 } 758 } 759 760 if ( fulltbl ) 761 dataend(); 762 763 else if ( ! fullspd ) 764 { 765 cmptmps(); /* create compressed template entries */ 766 767 /* Create tables for all the states with only one 768 * out-transition. 769 */ 770 while ( onesp > 0 ) 771 { 772 mk1tbl( onestate[onesp], onesym[onesp], onenext[onesp], 773 onedef[onesp] ); 774 --onesp; 775 } 776 777 mkdeftbl(); 778 } 779 780 flex_free( (void *) accset ); 781 flex_free( (void *) nset ); 782 } 783 784 785 /* snstods - converts a set of ndfa states into a dfa state 786 * 787 * synopsis 788 * is_new_state = snstods( int sns[numstates], int numstates, 789 * int accset[num_rules+1], int nacc, 790 * int hashval, int *newds_addr ); 791 * 792 * On return, the dfa state number is in newds. 793 */ 794 795 int snstods( sns, numstates, accset, nacc, hashval, newds_addr ) 796 int sns[], numstates, accset[], nacc, hashval, *newds_addr; 797 { 798 int didsort = 0; 799 int i, j; 800 int newds, *oldsns; 801 802 for ( i = 1; i <= lastdfa; ++i ) 803 if ( hashval == dhash[i] ) 804 { 805 if ( numstates == dfasiz[i] ) 806 { 807 oldsns = dss[i]; 808 809 if ( ! didsort ) 810 { 811 /* We sort the states in sns so we 812 * can compare it to oldsns quickly. 813 * We use bubble because there probably 814 * aren't very many states. 815 */ 816 bubble( sns, numstates ); 817 didsort = 1; 818 } 819 820 for ( j = 1; j <= numstates; ++j ) 821 if ( sns[j] != oldsns[j] ) 822 break; 823 824 if ( j > numstates ) 825 { 826 ++dfaeql; 827 *newds_addr = i; 828 return 0; 829 } 830 831 ++hshcol; 832 } 833 834 else 835 ++hshsave; 836 } 837 838 /* Make a new dfa. */ 839 840 if ( ++lastdfa >= current_max_dfas ) 841 increase_max_dfas(); 842 843 newds = lastdfa; 844 845 dss[newds] = allocate_integer_array( numstates + 1 ); 846 847 /* If we haven't already sorted the states in sns, we do so now, 848 * so that future comparisons with it can be made quickly. 849 */ 850 851 if ( ! didsort ) 852 bubble( sns, numstates ); 853 854 for ( i = 1; i <= numstates; ++i ) 855 dss[newds][i] = sns[i]; 856 857 dfasiz[newds] = numstates; 858 dhash[newds] = hashval; 859 860 if ( nacc == 0 ) 861 { 862 if ( reject ) 863 dfaacc[newds].dfaacc_set = (int *) 0; 864 else 865 dfaacc[newds].dfaacc_state = 0; 866 867 accsiz[newds] = 0; 868 } 869 870 else if ( reject ) 871 { 872 /* We sort the accepting set in increasing order so the 873 * disambiguating rule that the first rule listed is considered 874 * match in the event of ties will work. We use a bubble 875 * sort since the list is probably quite small. 876 */ 877 878 bubble( accset, nacc ); 879 880 dfaacc[newds].dfaacc_set = allocate_integer_array( nacc + 1 ); 881 882 /* Save the accepting set for later */ 883 for ( i = 1; i <= nacc; ++i ) 884 { 885 dfaacc[newds].dfaacc_set[i] = accset[i]; 886 887 if ( accset[i] <= num_rules ) 888 /* Who knows, perhaps a REJECT can yield 889 * this rule. 890 */ 891 rule_useful[accset[i]] = true; 892 } 893 894 accsiz[newds] = nacc; 895 } 896 897 else 898 { 899 /* Find lowest numbered rule so the disambiguating rule 900 * will work. 901 */ 902 j = num_rules + 1; 903 904 for ( i = 1; i <= nacc; ++i ) 905 if ( accset[i] < j ) 906 j = accset[i]; 907 908 dfaacc[newds].dfaacc_state = j; 909 910 if ( j <= num_rules ) 911 rule_useful[j] = true; 912 } 913 914 *newds_addr = newds; 915 916 return 1; 917 } 918 919 920 /* symfollowset - follow the symbol transitions one step 921 * 922 * synopsis 923 * numstates = symfollowset( int ds[current_max_dfa_size], int dsize, 924 * int transsym, int nset[current_max_dfa_size] ); 925 */ 926 927 int symfollowset( ds, dsize, transsym, nset ) 928 int ds[], dsize, transsym, nset[]; 929 { 930 int ns, tsp, sym, i, j, lenccl, ch, numstates, ccllist; 931 932 numstates = 0; 933 934 for ( i = 1; i <= dsize; ++i ) 935 { /* for each nfa state ns in the state set of ds */ 936 ns = ds[i]; 937 sym = transchar[ns]; 938 tsp = trans1[ns]; 939 940 if ( sym < 0 ) 941 { /* it's a character class */ 942 sym = -sym; 943 ccllist = cclmap[sym]; 944 lenccl = ccllen[sym]; 945 946 if ( cclng[sym] ) 947 { 948 for ( j = 0; j < lenccl; ++j ) 949 { 950 /* Loop through negated character 951 * class. 952 */ 953 ch = ccltbl[ccllist + j]; 954 955 if ( ch == 0 ) 956 ch = NUL_ec; 957 958 if ( ch > transsym ) 959 /* Transsym isn't in negated 960 * ccl. 961 */ 962 break; 963 964 else if ( ch == transsym ) 965 /* next 2 */ goto bottom; 966 } 967 968 /* Didn't find transsym in ccl. */ 969 nset[++numstates] = tsp; 970 } 971 972 else 973 for ( j = 0; j < lenccl; ++j ) 974 { 975 ch = ccltbl[ccllist + j]; 976 977 if ( ch == 0 ) 978 ch = NUL_ec; 979 980 if ( ch > transsym ) 981 break; 982 else if ( ch == transsym ) 983 { 984 nset[++numstates] = tsp; 985 break; 986 } 987 } 988 } 989 990 else if ( sym >= 'A' && sym <= 'Z' && caseins ) 991 flexfatal( 992 _( "consistency check failed in symfollowset" ) ); 993 994 else if ( sym == SYM_EPSILON ) 995 { /* do nothing */ 996 } 997 998 else if ( ABS( ecgroup[sym] ) == transsym ) 999 nset[++numstates] = tsp; 1000 1001 bottom: ; 1002 } 1003 1004 return numstates; 1005 } 1006 1007 1008 /* sympartition - partition characters with same out-transitions 1009 * 1010 * synopsis 1011 * sympartition( int ds[current_max_dfa_size], int numstates, 1012 * int symlist[numecs], int duplist[numecs] ); 1013 */ 1014 1015 void sympartition( ds, numstates, symlist, duplist ) 1016 int ds[], numstates; 1017 int symlist[], duplist[]; 1018 { 1019 int tch, i, j, k, ns, dupfwd[CSIZE + 1], lenccl, cclp, ich; 1020 1021 /* Partitioning is done by creating equivalence classes for those 1022 * characters which have out-transitions from the given state. Thus 1023 * we are really creating equivalence classes of equivalence classes. 1024 */ 1025 1026 for ( i = 1; i <= numecs; ++i ) 1027 { /* initialize equivalence class list */ 1028 duplist[i] = i - 1; 1029 dupfwd[i] = i + 1; 1030 } 1031 1032 duplist[1] = NIL; 1033 dupfwd[numecs] = NIL; 1034 1035 for ( i = 1; i <= numstates; ++i ) 1036 { 1037 ns = ds[i]; 1038 tch = transchar[ns]; 1039 1040 if ( tch != SYM_EPSILON ) 1041 { 1042 if ( tch < -lastccl || tch >= csize ) 1043 { 1044 flexfatal( 1045 _( "bad transition character detected in sympartition()" ) ); 1046 } 1047 1048 if ( tch >= 0 ) 1049 { /* character transition */ 1050 int ec = ecgroup[tch]; 1051 1052 mkechar( ec, dupfwd, duplist ); 1053 symlist[ec] = 1; 1054 } 1055 1056 else 1057 { /* character class */ 1058 tch = -tch; 1059 1060 lenccl = ccllen[tch]; 1061 cclp = cclmap[tch]; 1062 mkeccl( ccltbl + cclp, lenccl, dupfwd, 1063 duplist, numecs, NUL_ec ); 1064 1065 if ( cclng[tch] ) 1066 { 1067 j = 0; 1068 1069 for ( k = 0; k < lenccl; ++k ) 1070 { 1071 ich = ccltbl[cclp + k]; 1072 1073 if ( ich == 0 ) 1074 ich = NUL_ec; 1075 1076 for ( ++j; j < ich; ++j ) 1077 symlist[j] = 1; 1078 } 1079 1080 for ( ++j; j <= numecs; ++j ) 1081 symlist[j] = 1; 1082 } 1083 1084 else 1085 for ( k = 0; k < lenccl; ++k ) 1086 { 1087 ich = ccltbl[cclp + k]; 1088 1089 if ( ich == 0 ) 1090 ich = NUL_ec; 1091 1092 symlist[ich] = 1; 1093 } 1094 } 1095 } 1096 } 1097 } 1098