1-- Copyright (c) 1990 Regents of the University of California. 2-- All rights reserved. 3-- 4-- This software was developed by John Self of the Arcadia project 5-- at the University of California, Irvine. 6-- 7-- Redistribution and use in source and binary forms are permitted 8-- provided that the above copyright notice and this paragraph are 9-- duplicated in all such forms and that any documentation, 10-- advertising materials, and other materials related to such 11-- distribution and use acknowledge that the software was developed 12-- by the University of California, Irvine. The name of the 13-- University may not be used to endorse or promote products derived 14-- from this software without specific prior written permission. 15-- THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 16-- IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 17-- WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. 18 19-- TITLE NFA construction routines 20-- AUTHOR: John Self (UCI) 21-- DESCRIPTION builds the NFA. 22-- NOTES this file mirrors flex as closely as possible. 23-- $Header: /co/ua/self/arcadia/aflex/ada/src/RCS/nfaB.a,v 1.6 90/01/12 15:20:27 self Exp Locker: self $ 24with Ada.Characters.Wide_Wide_Latin_1; 25with Ada.Integer_Wide_Wide_Text_IO; 26with Ada.Wide_Wide_Text_IO; 27 28with MISC_DEFS, NFA, MISC, ECS; 29use MISC_DEFS; 30 31package body NFA is 32 33 use Ada.Integer_Wide_Wide_Text_IO; 34-- use Ada.Text_IO; 35 use Ada.Wide_Wide_Text_IO; 36 37-- add_accept - add an accepting state to a machine 38-- 39-- accepting_number becomes mach's accepting number. 40 41 procedure ADD_ACCEPT(MACH : in out INTEGER; 42 ACCEPTING_NUMBER : in INTEGER) is 43 -- hang the accepting number off an epsilon state. if it is associated 44 -- with a state that has a non-epsilon out-transition, then the state 45 -- will accept BEFORE it makes that transition, i.e., one character 46 -- too soon 47 ASTATE : INTEGER; 48 begin 49 if (TRANSCHAR(FINALST(MACH)) = SYM_EPSILON) then 50 ACCPTNUM(FINALST(MACH)) := ACCEPTING_NUMBER; 51 else 52 ASTATE := MKSTATE(SYM_EPSILON); 53 ACCPTNUM(ASTATE) := ACCEPTING_NUMBER; 54 MACH := LINK_MACHINES(MACH, ASTATE); 55 end if; 56 end ADD_ACCEPT; 57 58 59 -- copysingl - make a given number of copies of a singleton machine 60 -- 61 -- newsng - a new singleton composed of num copies of singl 62 -- singl - a singleton machine 63 -- num - the number of copies of singl to be present in newsng 64 65 function COPYSINGL(SINGL, NUM : in INTEGER) return INTEGER is 66 COPY : INTEGER; 67 begin 68 COPY := MKSTATE(SYM_EPSILON); 69 70 for I in 1 .. NUM loop 71 COPY := LINK_MACHINES(COPY, DUPMACHINE(SINGL)); 72 end loop; 73 74 return COPY; 75 end COPYSINGL; 76 77 78 -- dumpnfa - debugging routine to write out an nfa 79 80 procedure DUMPNFA(STATE1 : in INTEGER) is 81 SYM, TSP1, TSP2, ANUM : INTEGER; 82 begin 83 NEW_LINE(STANDARD_ERROR); 84 NEW_LINE(STANDARD_ERROR); 85 PUT(STANDARD_ERROR, 86 "********** beginning dump of nfa with start state "); 87 PUT(STANDARD_ERROR, STATE1, 0); 88 NEW_LINE(STANDARD_ERROR); 89 90 -- we probably should loop starting at firstst[state1] and going to 91 -- lastst[state1], but they're not maintained properly when we "or" 92 -- all of the rules together. So we use our knowledge that the machine 93 -- starts at state 1 and ends at lastnfa. 94 for NS in 1 .. LASTNFA loop 95 PUT(STANDARD_ERROR, "state # "); 96 PUT(STANDARD_ERROR, NS, 4); 97 PUT(Ada.Characters.Wide_Wide_Latin_1.HT); 98 SYM := TRANSCHAR(NS); 99 TSP1 := TRANS1(NS); 100 TSP2 := TRANS2(NS); 101 ANUM := ACCPTNUM(NS); 102 103 PUT(STANDARD_ERROR, SYM, 5); 104 PUT(STANDARD_ERROR, ": "); 105 PUT(STANDARD_ERROR, TSP1, 4); 106 PUT(STANDARD_ERROR, ","); 107 PUT(STANDARD_ERROR, TSP2, 4); 108 if (ANUM /= NIL) then 109 PUT(STANDARD_ERROR, " ["); 110 PUT(STANDARD_ERROR, ANUM, 0); 111 PUT(STANDARD_ERROR, "]"); 112 end if; 113 NEW_LINE(STANDARD_ERROR); 114 end loop; 115 116 PUT(STANDARD_ERROR, "********** end of dump"); 117 NEW_LINE(STANDARD_ERROR); 118 end DUMPNFA; 119 120 -- dupmachine - make a duplicate of a given machine 121 -- 122 -- copy - holds duplicate of mach 123 -- mach - machine to be duplicated 124 -- 125 -- note that the copy of mach is NOT an exact duplicate; rather, all the 126 -- transition states values are adjusted so that the copy is self-contained, 127 -- as the original should have been. 128 -- 129 -- also note that the original MUST be contiguous, with its low and high 130 -- states accessible by the arrays firstst and lastst 131 132 function DUPMACHINE(MACH : in INTEGER) return INTEGER is 133 INIT, STATE_OFFSET : INTEGER; 134 STATE : INTEGER := 0; 135 LAST : constant INTEGER := LASTST(MACH); 136 I : INTEGER; 137 begin 138 I := FIRSTST(MACH); 139 while (I <= LAST) loop 140 STATE := MKSTATE(TRANSCHAR(I)); 141 142 if (TRANS1(I) /= NO_TRANSITION) then 143 MKXTION(FINALST(STATE), TRANS1(I) + STATE - I); 144 145 if ((TRANSCHAR(I) = SYM_EPSILON) and (TRANS2(I) /= NO_TRANSITION)) then 146 MKXTION(FINALST(STATE), TRANS2(I) + STATE - I); 147 end if; 148 end if; 149 150 ACCPTNUM(STATE) := ACCPTNUM(I); 151 I := I + 1; 152 end loop; 153 154 if (STATE = 0) then 155 Misc.Aflex_Fatal ("empty machine in dupmachine()"); 156 end if; 157 158 STATE_OFFSET := STATE - I + 1; 159 160 INIT := MACH + STATE_OFFSET; 161 FIRSTST(INIT) := FIRSTST(MACH) + STATE_OFFSET; 162 FINALST(INIT) := FINALST(MACH) + STATE_OFFSET; 163 LASTST(INIT) := LASTST(MACH) + STATE_OFFSET; 164 165 return INIT; 166 end DUPMACHINE; 167 168 -- finish_rule - finish up the processing for a rule 169 -- 170 -- An accepting number is added to the given machine. If variable_trail_rule 171 -- is true then the rule has trailing context and both the head and trail 172 -- are variable size. Otherwise if headcnt or trailcnt is non-zero then 173 -- the machine recognizes a pattern with trailing context and headcnt is 174 -- the number of characters in the matched part of the pattern, or zero 175 -- if the matched part has variable length. trailcnt is the number of 176 -- trailing context characters in the pattern, or zero if the trailing 177 -- context has variable length. 178 179 procedure FINISH_RULE(MACH : in INTEGER; 180 VARIABLE_TRAIL_RULE : in BOOLEAN; 181 HEADCNT, TRAILCNT : in INTEGER) is 182 P_MACH : INTEGER; 183 begin 184 P_MACH := MACH; 185 ADD_ACCEPT(P_MACH, NUM_RULES); 186 187 -- we did this in new_rule(), but it often gets the wrong 188 -- number because we do it before we start parsing the current rule 189 RULE_LINENUM(NUM_RULES) := LINENUM; 190 191 PUT(TEMP_ACTION_FILE, " when "); 192 PUT(TEMP_ACTION_FILE, NUM_RULES, 1); 193 PUT_LINE(TEMP_ACTION_FILE, " => "); 194 195 if (VARIABLE_TRAIL_RULE) then 196 RULE_TYPE(NUM_RULES) := RULE_VARIABLE; 197 198 if (PERFORMANCE_REPORT) then 199 PUT(STANDARD_ERROR, "Variable trailing context rule at line "); 200 PUT(STANDARD_ERROR, RULE_LINENUM(NUM_RULES), 1); 201 NEW_LINE(STANDARD_ERROR); 202 end if; 203 204 VARIABLE_TRAILING_CONTEXT_RULES := TRUE; 205 else 206 RULE_TYPE(NUM_RULES) := RULE_NORMAL; 207 208 if ((HEADCNT > 0) or (TRAILCNT > 0)) then 209 210 -- do trailing context magic to not match the trailing characters 211 212 if (HEADCNT > 0) then 213 PUT(TEMP_ACTION_FILE, "yy_cp := yy_bp + "); 214 PUT(TEMP_ACTION_FILE, HEADCNT, 1); 215 PUT_LINE(TEMP_ACTION_FILE, ";"); 216 else 217 PUT(TEMP_ACTION_FILE, "yy_cp := yy_cp - "); 218 PUT(TEMP_ACTION_FILE, TRAILCNT, 1); 219 PUT_LINE(TEMP_ACTION_FILE, ";"); 220 end if; 221 222 PUT_LINE(TEMP_ACTION_FILE, "yy_c_buf_p := yy_cp;"); 223 PUT_LINE(TEMP_ACTION_FILE, 224 "YY_DO_BEFORE_ACTION; -- set up yytext again"); 225 end if; 226 end if; 227 228 MISC.LINE_DIRECTIVE_OUT(TEMP_ACTION_FILE); 229 end FINISH_RULE; 230 231 -- link_machines - connect two machines together 232 -- 233 -- new - a machine constructed by connecting first to last 234 -- first - the machine whose successor is to be last 235 -- last - the machine whose predecessor is to be first 236 -- 237 -- note: this routine concatenates the machine first with the machine 238 -- last to produce a machine new which will pattern-match first first 239 -- and then last, and will fail if either of the sub-patterns fails. 240 -- FIRST is set to new by the operation. last is unmolested. 241 242 function LINK_MACHINES(FIRST, LAST : in INTEGER) return INTEGER is 243 begin 244 if (FIRST = NIL) then 245 return LAST; 246 else 247 if (LAST = NIL) then 248 return FIRST; 249 else 250 MKXTION(FINALST(FIRST), LAST); 251 FINALST(FIRST) := FINALST(LAST); 252 LASTST(FIRST) := Integer'Max (LASTST(FIRST), LASTST(LAST)); 253 FIRSTST(FIRST) := Integer'Min(FIRSTST(FIRST), FIRSTST(LAST)); 254 return (FIRST); 255 end if; 256 end if; 257 end LINK_MACHINES; 258 259 260 -- mark_beginning_as_normal - mark each "beginning" state in a machine 261-- as being a "normal" (i.e., not trailing context- 262 -- associated) states 263 -- 264 -- The "beginning" states are the epsilon closure of the first state 265 266 procedure MARK_BEGINNING_AS_NORMAL(MACH : in INTEGER) is 267 begin 268 case (STATE_TYPE(MACH)) is 269 when STATE_NORMAL => 270 271 -- oh, we've already visited here 272 return; 273 274 when STATE_TRAILING_CONTEXT => 275 STATE_TYPE(MACH) := STATE_NORMAL; 276 277 if (TRANSCHAR(MACH) = SYM_EPSILON) then 278 if (TRANS1(MACH) /= NO_TRANSITION) then 279 MARK_BEGINNING_AS_NORMAL(TRANS1(MACH)); 280 end if; 281 282 if (TRANS2(MACH) /= NO_TRANSITION) then 283 MARK_BEGINNING_AS_NORMAL(TRANS2(MACH)); 284 end if; 285 end if; 286 end case; 287 end MARK_BEGINNING_AS_NORMAL; 288 289 -- mkbranch - make a machine that branches to two machines 290 -- 291 -- branch - a machine which matches either first's pattern or second's 292-- first, second - machines whose patterns are to be or'ed (the | operator) 293 -- 294 -- note that first and second are NEITHER destroyed by the operation. Also, 295 -- the resulting machine CANNOT be used with any other "mk" operation except 296 -- more mkbranch's. Compare with mkor() 297 function MKBRANCH(FIRST, SECOND : in INTEGER) return INTEGER is 298 EPS : INTEGER; 299 begin 300 if (FIRST = NO_TRANSITION) then 301 return SECOND; 302 else 303 if (SECOND = NO_TRANSITION) then 304 return FIRST; 305 end if; 306 end if; 307 308 EPS := MKSTATE(SYM_EPSILON); 309 310 MKXTION(EPS, FIRST); 311 MKXTION(EPS, SECOND); 312 313 return EPS; 314 end MKBRANCH; 315 316 317 -- mkclos - convert a machine into a closure 318 -- 319 -- new - a new state which matches the closure of "state" 320 321 function MKCLOS(STATE : in INTEGER) return INTEGER is 322 begin 323 return NFA.MKOPT(MKPOSCL(STATE)); 324 end MKCLOS; 325 326 327 -- mkopt - make a machine optional 328 -- 329 -- new - a machine which optionally matches whatever mach matched 330 -- mach - the machine to make optional 331 -- 332 -- notes: 333 -- 1. mach must be the last machine created 334 -- 2. mach is destroyed by the call 335 336 function MKOPT(MACH : in INTEGER) return INTEGER is 337 EPS : INTEGER; 338 RESULT : INTEGER; 339 begin 340 RESULT := MACH; 341 if (not SUPER_FREE_EPSILON(FINALST(RESULT))) then 342 EPS := NFA.MKSTATE(SYM_EPSILON); 343 RESULT := NFA.LINK_MACHINES(RESULT, EPS); 344 end if; 345 346 -- can't skimp on the following if FREE_EPSILON(mach) is true because 347 -- some state interior to "mach" might point back to the beginning 348 -- for a closure 349 EPS := NFA.MKSTATE(SYM_EPSILON); 350 RESULT := NFA.LINK_MACHINES(EPS, RESULT); 351 352 NFA.MKXTION(RESULT, FINALST(RESULT)); 353 354 return RESULT; 355 end MKOPT; 356 357 358 -- mkor - make a machine that matches either one of two machines 359 -- 360 -- new - a machine which matches either first's pattern or second's 361-- first, second - machines whose patterns are to be or'ed (the | operator) 362 -- 363 -- note that first and second are both destroyed by the operation 364 -- the code is rather convoluted because an attempt is made to minimize 365 -- the number of epsilon states needed 366 367 function MKOR(FIRST, SECOND : in INTEGER) return INTEGER is 368 EPS, OREND : INTEGER; 369 P_FIRST : INTEGER; 370 begin 371 P_FIRST := FIRST; 372 if (P_FIRST = NIL) then 373 return SECOND; 374 else 375 if (SECOND = NIL) then 376 return P_FIRST; 377 else 378 379 -- see comment in mkopt() about why we can't use the first state 380 -- of "first" or "second" if they satisfy "FREE_EPSILON" 381 EPS := MKSTATE(SYM_EPSILON); 382 383 P_FIRST := LINK_MACHINES(EPS, P_FIRST); 384 385 MKXTION(P_FIRST, SECOND); 386 387 if ((SUPER_FREE_EPSILON(FINALST(P_FIRST))) and (ACCPTNUM(FINALST(P_FIRST 388 )) = NIL)) then 389 OREND := FINALST(P_FIRST); 390 MKXTION(FINALST(SECOND), OREND); 391 else 392 if ((SUPER_FREE_EPSILON(FINALST(SECOND))) and (ACCPTNUM(FINALST(SECOND 393 )) = NIL)) then 394 OREND := FINALST(SECOND); 395 MKXTION(FINALST(P_FIRST), OREND); 396 else 397 EPS := MKSTATE(SYM_EPSILON); 398 P_FIRST := LINK_MACHINES(P_FIRST, EPS); 399 OREND := FINALST(P_FIRST); 400 401 MKXTION(FINALST(SECOND), OREND); 402 end if; 403 end if; 404 end if; 405 end if; 406 407 FINALST(P_FIRST) := OREND; 408 return P_FIRST; 409 end MKOR; 410 411 412 -- mkposcl - convert a machine into a positive closure 413 -- 414 -- new - a machine matching the positive closure of "state" 415 416 function MKPOSCL(STATE : in INTEGER) return INTEGER is 417 EPS : INTEGER; 418 begin 419 if (SUPER_FREE_EPSILON(FINALST(STATE))) then 420 MKXTION(FINALST(STATE), STATE); 421 return (STATE); 422 else 423 EPS := MKSTATE(SYM_EPSILON); 424 MKXTION(EPS, STATE); 425 return (LINK_MACHINES(STATE, EPS)); 426 end if; 427 end MKPOSCL; 428 429 -- mkrep - make a replicated machine 430 -- 431 -- new - a machine that matches whatever "mach" matched from "lb" 432 -- number of times to "ub" number of times 433 -- 434 -- note 435-- if "ub" is INFINITY then "new" matches "lb" or more occurrences of "mach" 436 437 function MKREP(MACH, LB, UB : in INTEGER) return INTEGER is 438 BASE_MACH, TAIL, COPY : INTEGER; 439 P_MACH : INTEGER; 440 begin 441 P_MACH := MACH; 442 BASE_MACH := COPYSINGL(P_MACH, LB - 1); 443 444 if (UB = INFINITY) then 445 COPY := DUPMACHINE(P_MACH); 446 P_MACH := LINK_MACHINES(P_MACH, LINK_MACHINES(BASE_MACH, MKCLOS(COPY))); 447 else 448 TAIL := MKSTATE(SYM_EPSILON); 449 450 for I in LB .. UB - 1 loop 451 COPY := DUPMACHINE(P_MACH); 452 TAIL := MKOPT(LINK_MACHINES(COPY, TAIL)); 453 end loop; 454 455 P_MACH := LINK_MACHINES(P_MACH, LINK_MACHINES(BASE_MACH, TAIL)); 456 end if; 457 458 return P_MACH; 459 end MKREP; 460 461 -- mkstate - create a state with a transition on a given symbol 462 -- 463 -- state - a new state matching sym 464 -- sym - the symbol the new state is to have an out-transition on 465 -- 466 -- note that this routine makes new states in ascending order through the 467 -- state array (and increments LASTNFA accordingly). The routine DUPMACHINE 468 -- relies on machines being made in ascending order and that they are 469 -- CONTIGUOUS. Change it and you will have to rewrite DUPMACHINE (kludge 470 -- that it admittedly is) 471 472 function MKSTATE(SYM : in INTEGER) return INTEGER is 473 begin 474 LASTNFA := LASTNFA + 1; 475 if (LASTNFA >= CURRENT_MNS) then 476 CURRENT_MNS := CURRENT_MNS + MNS_INCREMENT; 477 if (CURRENT_MNS >= MAXIMUM_MNS) then 478 Misc.Aflex_Error 479 ("input rules are too complicated (>= " 480 & INTEGER'Wide_Wide_Image (CURRENT_MNS) & " NFA states) )"); 481 end if; 482 483 NUM_REALLOCS := NUM_REALLOCS + 1; 484 485 REALLOCATE_INTEGER_ARRAY(FIRSTST, CURRENT_MNS); 486 REALLOCATE_INTEGER_ARRAY(LASTST, CURRENT_MNS); 487 REALLOCATE_INTEGER_ARRAY(FINALST, CURRENT_MNS); 488 REALLOCATE_INTEGER_ARRAY(TRANSCHAR, CURRENT_MNS); 489 REALLOCATE_INTEGER_ARRAY(TRANS1, CURRENT_MNS); 490 REALLOCATE_INTEGER_ARRAY(TRANS2, CURRENT_MNS); 491 REALLOCATE_INTEGER_ARRAY(ACCPTNUM, CURRENT_MNS); 492 REALLOCATE_INTEGER_ARRAY(ASSOC_RULE, CURRENT_MNS); 493 REALLOCATE_STATE_ENUM_ARRAY(STATE_TYPE, CURRENT_MNS); 494 end if; 495 496 FIRSTST(LASTNFA) := LASTNFA; 497 FINALST(LASTNFA) := LASTNFA; 498 LASTST(LASTNFA) := LASTNFA; 499 TRANSCHAR(LASTNFA) := SYM; 500 TRANS1(LASTNFA) := NO_TRANSITION; 501 TRANS2(LASTNFA) := NO_TRANSITION; 502 ACCPTNUM(LASTNFA) := NIL; 503 ASSOC_RULE(LASTNFA) := NUM_RULES; 504 STATE_TYPE(LASTNFA) := CURRENT_STATE_ENUM; 505 506 -- fix up equivalence classes base on this transition. Note that any 507 -- character which has its own transition gets its own equivalence class. 508 -- Thus only characters which are only in character classes have a chance 509 -- at being in the same equivalence class. E.g. "a|b" puts 'a' and 'b' 510 -- into two different equivalence classes. "[ab]" puts them in the same 511 -- equivalence class (barring other differences elsewhere in the input). 512 if (SYM < 0) then 513 514 -- we don't have to update the equivalence classes since that was 515 -- already done when the ccl was created for the first time 516 null; 517 else 518 if (SYM = SYM_EPSILON) then 519 NUMEPS := NUMEPS + 1; 520 else 521 if (USEECS) then 522 ECS.MKECHAR(SYM, NEXTECM, ECGROUP); 523 end if; 524 end if; 525 end if; 526 527 return LASTNFA; 528 end MKSTATE; 529 530 -- mkxtion - make a transition from one state to another 531 -- 532 -- statefrom - the state from which the transition is to be made 533 -- stateto - the state to which the transition is to be made 534 535 procedure MKXTION(STATEFROM, STATETO : in INTEGER) is 536 begin 537 if (TRANS1(STATEFROM) = NO_TRANSITION) then 538 TRANS1(STATEFROM) := STATETO; 539 else 540 if ((TRANSCHAR(STATEFROM) /= SYM_EPSILON) or (TRANS2(STATEFROM) /= 541 NO_TRANSITION)) then 542 Misc.Aflex_Fatal ("found too many transitions in mkxtion()"); 543 else 544 545 -- second out-transition for an epsilon state 546 EPS2 := EPS2 + 1; 547 TRANS2(STATEFROM) := STATETO; 548 end if; 549 end if; 550 end MKXTION; 551 552 -- new_rule - initialize for a new rule 553 -- 554 -- the global num_rules is incremented and the any corresponding dynamic 555 -- arrays (such as rule_type()) are grown as needed. 556 557 procedure NEW_RULE is 558 begin 559 NUM_RULES := NUM_RULES + 1; 560 if (NUM_RULES >= CURRENT_MAX_RULES) then 561 NUM_REALLOCS := NUM_REALLOCS + 1; 562 CURRENT_MAX_RULES := CURRENT_MAX_RULES + MAX_RULES_INCREMENT; 563 REALLOCATE_RULE_ENUM_ARRAY(RULE_TYPE, CURRENT_MAX_RULES); 564 REALLOCATE_INTEGER_ARRAY(RULE_LINENUM, CURRENT_MAX_RULES); 565 end if; 566 567 if (NUM_RULES > MAX_RULE) then 568 Misc.Aflex_Error 569 ("too many rules (> " 570 & INTEGER'Wide_Wide_Image (MAX_RULE) & ")!"); 571 end if; 572 573 RULE_LINENUM(NUM_RULES) := LINENUM; 574 end NEW_RULE; 575 576end NFA; 577