1 /*
2 * Copyright 2001-2006 Adrian Thurston <thurston@complang.org>
3 */
4
5 /* This file is part of Ragel.
6 *
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21
22 #include <iostream>
23 #include <iomanip>
24 #include <errno.h>
25 #include <limits.h>
26 #include <stdlib.h>
27
28 /* Parsing. */
29 #include "ragel.h"
30 #include "rlparse.h"
31 #include "parsetree.h"
32
33 using namespace std;
34 ostream &operator<<( ostream &out, const NameRef &nameRef );
35 ostream &operator<<( ostream &out, const NameInst &nameInst );
36
37 /* Convert the literal string which comes in from the scanner into an array of
38 * characters with escapes and options interpreted. Also null terminates the
39 * string. Though this null termination should not be relied on for
40 * interpreting literals in the parser because the string may contain \0 */
prepareLitString(const InputLoc & loc,const char * data,long length,long & resLen,bool & caseInsensitive)41 char *prepareLitString( const InputLoc &loc, const char *data, long length,
42 long &resLen, bool &caseInsensitive )
43 {
44 char *resData = new char[length+1];
45 caseInsensitive = false;
46
47 const char *src = data + 1;
48 const char *end = data + length - 1;
49
50 while ( *end != '\'' && *end != '\"' ) {
51 if ( *end == 'i' )
52 caseInsensitive = true;
53 else {
54 error( loc ) << "literal string '" << *end <<
55 "' option not supported" << endl;
56 }
57 end -= 1;
58 }
59
60 char *dest = resData;
61 long len = 0;
62 while ( src != end ) {
63 if ( *src == '\\' ) {
64 switch ( src[1] ) {
65 case '0': dest[len++] = '\0'; break;
66 case 'a': dest[len++] = '\a'; break;
67 case 'b': dest[len++] = '\b'; break;
68 case 't': dest[len++] = '\t'; break;
69 case 'n': dest[len++] = '\n'; break;
70 case 'v': dest[len++] = '\v'; break;
71 case 'f': dest[len++] = '\f'; break;
72 case 'r': dest[len++] = '\r'; break;
73 case '\n': break;
74 default: dest[len++] = src[1]; break;
75 }
76 src += 2;
77 }
78 else {
79 dest[len++] = *src++;
80 }
81 }
82
83 resLen = len;
84 resData[resLen] = 0;
85 return resData;
86 }
87
walk(ParseData * pd)88 FsmAp *VarDef::walk( ParseData *pd )
89 {
90 /* We enter into a new name scope. */
91 NameFrame nameFrame = pd->enterNameScope( true, 1 );
92
93 /* Recurse on the expression. */
94 FsmAp *rtnVal = machineDef->walk( pd );
95
96 /* Do the tranfer of local error actions. */
97 LocalErrDictEl *localErrDictEl = pd->localErrDict.find( name );
98 if ( localErrDictEl != 0 ) {
99 for ( StateList::Iter state = rtnVal->stateList; state.lte(); state++ )
100 rtnVal->transferErrorActions( state, localErrDictEl->value );
101 }
102
103 /* If the expression below is a join operation with multiple expressions
104 * then it just had epsilon transisions resolved. If it is a join
105 * with only a single expression then run the epsilon op now. */
106 if ( machineDef->type == MachineDef::JoinType && machineDef->join->exprList.length() == 1 )
107 rtnVal->epsilonOp();
108
109 /* We can now unset entry points that are not longer used. */
110 pd->unsetObsoleteEntries( rtnVal );
111
112 /* If the name of the variable is referenced then add the entry point to
113 * the graph. */
114 if ( pd->curNameInst->numRefs > 0 )
115 rtnVal->setEntry( pd->curNameInst->id, rtnVal->startState );
116
117 /* Pop the name scope. */
118 pd->popNameScope( nameFrame );
119 return rtnVal;
120 }
121
makeNameTree(const InputLoc & loc,ParseData * pd)122 void VarDef::makeNameTree( const InputLoc &loc, ParseData *pd )
123 {
124 /* The variable definition enters a new scope. */
125 NameInst *prevNameInst = pd->curNameInst;
126 pd->curNameInst = pd->addNameInst( loc, name, false );
127
128 if ( machineDef->type == MachineDef::LongestMatchType )
129 pd->curNameInst->isLongestMatch = true;
130
131 /* Recurse. */
132 machineDef->makeNameTree( pd );
133
134 /* The name scope ends, pop the name instantiation. */
135 pd->curNameInst = prevNameInst;
136 }
137
resolveNameRefs(ParseData * pd)138 void VarDef::resolveNameRefs( ParseData *pd )
139 {
140 /* Entering into a new scope. */
141 NameFrame nameFrame = pd->enterNameScope( true, 1 );
142
143 /* Recurse. */
144 machineDef->resolveNameRefs( pd );
145
146 /* The name scope ends, pop the name instantiation. */
147 pd->popNameScope( nameFrame );
148 }
149
getLoc()150 InputLoc LongestMatchPart::getLoc()
151 {
152 return action != 0 ? action->loc : semiLoc;
153 }
154
155 /*
156 * If there are any LMs then all of the following entry points must reset
157 * tokstart:
158 *
159 * 1. fentry(StateRef)
160 * 2. ftoto(StateRef), fcall(StateRef), fnext(StateRef)
161 * 3. targt of any transition that has an fcall (the return loc).
162 * 4. start state of all longest match routines.
163 */
164
newAction(ParseData * pd,const InputLoc & loc,const char * name,InlineList * inlineList)165 Action *LongestMatch::newAction( ParseData *pd, const InputLoc &loc,
166 const char *name, InlineList *inlineList )
167 {
168 Action *action = new Action( loc, name, inlineList, pd->nextCondId++ );
169 action->actionRefs.append( pd->curNameInst );
170 pd->actionList.append( action );
171 action->isLmAction = true;
172 return action;
173 }
174
makeActions(ParseData * pd)175 void LongestMatch::makeActions( ParseData *pd )
176 {
177 /* Make actions that set the action id. */
178 for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
179 /* For each part create actions for setting the match type. We need
180 * to do this so that the actions will go into the actionIndex. */
181 InlineList *inlineList = new InlineList;
182 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
183 InlineItem::LmSetActId ) );
184 char *actName = new char[50];
185 sprintf( actName, "store%i", lmi->longestMatchId );
186 lmi->setActId = newAction( pd, lmi->getLoc(), actName, inlineList );
187 }
188
189 /* Make actions that execute the user action and restart on the last
190 * character. */
191 for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
192 /* For each part create actions for setting the match type. We need
193 * to do this so that the actions will go into the actionIndex. */
194 InlineList *inlineList = new InlineList;
195 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
196 InlineItem::LmOnLast ) );
197 char *actName = new char[50];
198 sprintf( actName, "last%i", lmi->longestMatchId );
199 lmi->actOnLast = newAction( pd, lmi->getLoc(), actName, inlineList );
200 }
201
202 /* Make actions that execute the user action and restart on the next
203 * character. These actions will set tokend themselves (it is the current
204 * char). */
205 for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
206 /* For each part create actions for setting the match type. We need
207 * to do this so that the actions will go into the actionIndex. */
208 InlineList *inlineList = new InlineList;
209 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
210 InlineItem::LmOnNext ) );
211 char *actName = new char[50];
212 sprintf( actName, "next%i", lmi->longestMatchId );
213 lmi->actOnNext = newAction( pd, lmi->getLoc(), actName, inlineList );
214 }
215
216 /* Make actions that execute the user action and restart at tokend. These
217 * actions execute some time after matching the last char. */
218 for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
219 /* For each part create actions for setting the match type. We need
220 * to do this so that the actions will go into the actionIndex. */
221 InlineList *inlineList = new InlineList;
222 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
223 InlineItem::LmOnLagBehind ) );
224 char *actName = new char[50];
225 sprintf( actName, "lag%i", lmi->longestMatchId );
226 lmi->actLagBehind = newAction( pd, lmi->getLoc(), actName, inlineList );
227 }
228
229 InputLoc loc;
230 loc.line = 1;
231 loc.col = 1;
232 loc.fileName = "NONE";
233
234 /* Create the error action. */
235 InlineList *il6 = new InlineList;
236 il6->append( new InlineItem( loc, this, 0, InlineItem::LmSwitch ) );
237 lmActSelect = newAction( pd, loc, "switch", il6 );
238 }
239
findName(ParseData * pd)240 void LongestMatch::findName( ParseData *pd )
241 {
242 NameInst *nameInst = pd->curNameInst;
243 while ( nameInst->name == 0 ) {
244 nameInst = nameInst->parent;
245 /* Since every machine must must have a name, we should always find a
246 * name for the longest match. */
247 assert( nameInst != 0 );
248 }
249 name = nameInst->name;
250 }
251
makeNameTree(ParseData * pd)252 void LongestMatch::makeNameTree( ParseData *pd )
253 {
254 /* Create an anonymous scope for the longest match. Will be used for
255 * restarting machine after matching a token. */
256 NameInst *prevNameInst = pd->curNameInst;
257 pd->curNameInst = pd->addNameInst( loc, 0, false );
258
259 /* Recurse into all parts of the longest match operator. */
260 for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ )
261 lmi->join->makeNameTree( pd );
262
263 /* Traverse the name tree upwards to find a name for this lm. */
264 findName( pd );
265
266 /* Also make the longest match's actions at this point. */
267 makeActions( pd );
268
269 /* The name scope ends, pop the name instantiation. */
270 pd->curNameInst = prevNameInst;
271 }
272
resolveNameRefs(ParseData * pd)273 void LongestMatch::resolveNameRefs( ParseData *pd )
274 {
275 /* The longest match gets its own name scope. */
276 NameFrame nameFrame = pd->enterNameScope( true, 1 );
277
278 /* Take an action reference for each longest match item and recurse. */
279 for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
280 /* Record the reference if the item has an action. */
281 if ( lmi->action != 0 )
282 lmi->action->actionRefs.append( pd->localNameScope );
283
284 /* Recurse down the join. */
285 lmi->join->resolveNameRefs( pd );
286 }
287
288 /* The name scope ends, pop the name instantiation. */
289 pd->popNameScope( nameFrame );
290 }
291
restart(FsmAp * graph,TransAp * trans)292 void LongestMatch::restart( FsmAp *graph, TransAp *trans )
293 {
294 StateAp *fromState = trans->fromState;
295 graph->detachTrans( fromState, trans->toState, trans );
296 graph->attachTrans( fromState, graph->startState, trans );
297 }
298
runLongestMatch(ParseData * pd,FsmAp * graph)299 void LongestMatch::runLongestMatch( ParseData *pd, FsmAp *graph )
300 {
301 graph->markReachableFromHereStopFinal( graph->startState );
302 for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
303 if ( ms->stateBits & STB_ISMARKED ) {
304 ms->lmItemSet.insert( 0 );
305 ms->stateBits &= ~ STB_ISMARKED;
306 }
307 }
308
309 /* Transfer the first item of non-empty lmAction tables to the item sets
310 * of the states that follow. Exclude states that have no transitions out.
311 * This must happen on a separate pass so that on each iteration of the
312 * next pass we have the item set entries from all lmAction tables. */
313 for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
314 for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
315 if ( trans->lmActionTable.length() > 0 ) {
316 LmActionTableEl *lmAct = trans->lmActionTable.data;
317 StateAp *toState = trans->toState;
318 assert( toState );
319
320 /* Can only optimize this if there are no transitions out.
321 * Note there can be out transitions going nowhere with
322 * actions and they too must inhibit this optimization. */
323 if ( toState->outList.length() > 0 ) {
324 /* Fill the item sets. */
325 graph->markReachableFromHereStopFinal( toState );
326 for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
327 if ( ms->stateBits & STB_ISMARKED ) {
328 ms->lmItemSet.insert( lmAct->value );
329 ms->stateBits &= ~ STB_ISMARKED;
330 }
331 }
332 }
333 }
334 }
335 }
336
337 /* The lmItem sets are now filled, telling us which longest match rules
338 * can succeed in which states. First determine if we need to make sure
339 * act is defaulted to zero. We need to do this if there are any states
340 * with lmItemSet.length() > 1 and NULL is included. That is, that the
341 * switch may get called when in fact nothing has been matched. */
342 int maxItemSetLength = 0;
343 graph->markReachableFromHereStopFinal( graph->startState );
344 for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
345 if ( ms->stateBits & STB_ISMARKED ) {
346 if ( ms->lmItemSet.length() > maxItemSetLength )
347 maxItemSetLength = ms->lmItemSet.length();
348 ms->stateBits &= ~ STB_ISMARKED;
349 }
350 }
351
352 /* The actions executed on starting to match a token. */
353 graph->isolateStartState();
354 graph->startState->toStateActionTable.setAction( pd->initTokStartOrd, pd->initTokStart );
355 graph->startState->fromStateActionTable.setAction( pd->setTokStartOrd, pd->setTokStart );
356 if ( maxItemSetLength > 1 ) {
357 /* The longest match action switch may be called when tokens are
358 * matched, in which case act must be initialized, there must be a
359 * case to handle the error, and the generated machine will require an
360 * error state. */
361 lmSwitchHandlesError = true;
362 pd->lmRequiresErrorState = true;
363 graph->startState->toStateActionTable.setAction( pd->initActIdOrd, pd->initActId );
364 }
365
366 /* The place to store transitions to restart. It maybe possible for the
367 * restarting to affect the searching through the graph that follows. For
368 * now take the safe route and save the list of transitions to restart
369 * until after all searching is done. */
370 Vector<TransAp*> restartTrans;
371
372 /* Set actions that do immediate token recognition, set the longest match part
373 * id and set the token ending. */
374 for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
375 for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
376 if ( trans->lmActionTable.length() > 0 ) {
377 LmActionTableEl *lmAct = trans->lmActionTable.data;
378 StateAp *toState = trans->toState;
379 assert( toState );
380
381 /* Can only optimize this if there are no transitions out.
382 * Note there can be out transitions going nowhere with
383 * actions and they too must inhibit this optimization. */
384 if ( toState->outList.length() == 0 ) {
385 /* Can execute the immediate action for the longest match
386 * part. Redirect the action to the start state.
387 *
388 * NOTE: When we need to inhibit on_last due to leaving
389 * actions the above test suffices. If the state has out
390 * actions then it will fail because the out action will
391 * have been transferred to an error transition, which
392 * makes the outlist non-empty. */
393 trans->actionTable.setAction( lmAct->key,
394 lmAct->value->actOnLast );
395 restartTrans.append( trans );
396 }
397 else {
398 /* Look for non final states that have a non-empty item
399 * set. If these are present then we need to record the
400 * end of the token. Also Find the highest item set
401 * length reachable from here (excluding at transtions to
402 * final states). */
403 bool nonFinalNonEmptyItemSet = false;
404 maxItemSetLength = 0;
405 graph->markReachableFromHereStopFinal( toState );
406 for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
407 if ( ms->stateBits & STB_ISMARKED ) {
408 if ( ms->lmItemSet.length() > 0 && !ms->isFinState() )
409 nonFinalNonEmptyItemSet = true;
410 if ( ms->lmItemSet.length() > maxItemSetLength )
411 maxItemSetLength = ms->lmItemSet.length();
412 ms->stateBits &= ~ STB_ISMARKED;
413 }
414 }
415
416 /* If there are reachable states that are not final and
417 * have non empty item sets or that have an item set
418 * length greater than one then we need to set tokend
419 * because the error action that matches the token will
420 * require it. */
421 if ( nonFinalNonEmptyItemSet || maxItemSetLength > 1 )
422 trans->actionTable.setAction( pd->setTokEndOrd, pd->setTokEnd );
423
424 /* Some states may not know which longest match item to
425 * execute, must set it. */
426 if ( maxItemSetLength > 1 ) {
427 /* There are transitions out, another match may come. */
428 trans->actionTable.setAction( lmAct->key,
429 lmAct->value->setActId );
430 }
431 }
432 }
433 }
434 }
435
436 /* Now that all graph searching is done it certainly safe set the
437 * restarting. It may be safe above, however this must be verified. */
438 for ( Vector<TransAp*>::Iter pt = restartTrans; pt.lte(); pt++ )
439 restart( graph, *pt );
440
441 int lmErrActionOrd = pd->curActionOrd++;
442
443 /* Embed the error for recognizing a char. */
444 for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
445 if ( st->lmItemSet.length() == 1 && st->lmItemSet[0] != 0 ) {
446 if ( st->isFinState() ) {
447 /* On error execute the onActNext action, which knows that
448 * the last character of the token was one back and restart. */
449 graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
450 &st->lmItemSet[0]->actOnNext, 1 );
451 st->eofActionTable.setAction( lmErrActionOrd,
452 st->lmItemSet[0]->actOnNext );
453 st->eofTarget = graph->startState;
454 }
455 else {
456 graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
457 &st->lmItemSet[0]->actLagBehind, 1 );
458 st->eofActionTable.setAction( lmErrActionOrd,
459 st->lmItemSet[0]->actLagBehind );
460 st->eofTarget = graph->startState;
461 }
462 }
463 else if ( st->lmItemSet.length() > 1 ) {
464 /* Need to use the select. Take note of which items the select
465 * is needed for so only the necessary actions are included. */
466 for ( LmItemSet::Iter plmi = st->lmItemSet; plmi.lte(); plmi++ ) {
467 if ( *plmi != 0 )
468 (*plmi)->inLmSelect = true;
469 }
470 /* On error, execute the action select and go to the start state. */
471 graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
472 &lmActSelect, 1 );
473 st->eofActionTable.setAction( lmErrActionOrd, lmActSelect );
474 st->eofTarget = graph->startState;
475 }
476 }
477
478 /* Finally, the start state should be made final. */
479 graph->setFinState( graph->startState );
480 }
481
transferScannerLeavingActions(FsmAp * graph)482 void LongestMatch::transferScannerLeavingActions( FsmAp *graph )
483 {
484 for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
485 if ( st->outActionTable.length() > 0 )
486 graph->setErrorActions( st, st->outActionTable );
487 }
488 }
489
walk(ParseData * pd)490 FsmAp *LongestMatch::walk( ParseData *pd )
491 {
492 /* The longest match has it's own name scope. */
493 NameFrame nameFrame = pd->enterNameScope( true, 1 );
494
495 /* Make each part of the longest match. */
496 FsmAp **parts = new FsmAp*[longestMatchList->length()];
497 LmPartList::Iter lmi = *longestMatchList;
498 for ( int i = 0; lmi.lte(); lmi++, i++ ) {
499 /* Create the machine and embed the setting of the longest match id. */
500 parts[i] = lmi->join->walk( pd );
501 parts[i]->longMatchAction( pd->curActionOrd++, lmi );
502 }
503
504 /* Before we union the patterns we need to deal with leaving actions. They
505 * are transfered to error transitions out of the final states (like local
506 * error actions) and to eof actions. In the scanner we need to forbid
507 * on_last for any final state that has an leaving action. */
508 for ( int i = 0; i < longestMatchList->length(); i++ )
509 transferScannerLeavingActions( parts[i] );
510
511 /* Union machines one and up with machine zero. The grammar dictates that
512 * there will always be at least one part. */
513 FsmAp *rtnVal = parts[0];
514 for ( int i = 1; i < longestMatchList->length(); i++ ) {
515 rtnVal->unionOp( parts[i] );
516 afterOpMinimize( rtnVal );
517 }
518
519 runLongestMatch( pd, rtnVal );
520
521 /* Pop the name scope. */
522 pd->popNameScope( nameFrame );
523
524 delete[] parts;
525 return rtnVal;
526 }
527
walk(ParseData * pd)528 FsmAp *MachineDef::walk( ParseData *pd )
529 {
530 FsmAp *rtnVal = 0;
531 switch ( type ) {
532 case JoinType:
533 rtnVal = join->walk( pd );
534 break;
535 case LongestMatchType:
536 rtnVal = longestMatch->walk( pd );
537 break;
538 case LengthDefType:
539 condData->lastCondKey.increment();
540 rtnVal = new FsmAp();
541 rtnVal->concatFsm( condData->lastCondKey );
542 break;
543 }
544 return rtnVal;
545 }
546
makeNameTree(ParseData * pd)547 void MachineDef::makeNameTree( ParseData *pd )
548 {
549 switch ( type ) {
550 case JoinType:
551 join->makeNameTree( pd );
552 break;
553 case LongestMatchType:
554 longestMatch->makeNameTree( pd );
555 break;
556 case LengthDefType:
557 break;
558 }
559 }
560
resolveNameRefs(ParseData * pd)561 void MachineDef::resolveNameRefs( ParseData *pd )
562 {
563 switch ( type ) {
564 case JoinType:
565 join->resolveNameRefs( pd );
566 break;
567 case LongestMatchType:
568 longestMatch->resolveNameRefs( pd );
569 break;
570 case LengthDefType:
571 break;
572 }
573 }
574
575
576 /* Construct with a location and the first expression. */
Join(const InputLoc & loc,Expression * expr)577 Join::Join( const InputLoc &loc, Expression *expr )
578 :
579 loc(loc)
580 {
581 exprList.append( expr );
582 }
583
584 /* Construct with a location and the first expression. */
Join(Expression * expr)585 Join::Join( Expression *expr )
586 {
587 exprList.append( expr );
588 }
589
590 /* Walk an expression node. */
walk(ParseData * pd)591 FsmAp *Join::walk( ParseData *pd )
592 {
593 if ( exprList.length() > 1 )
594 return walkJoin( pd );
595 else
596 return exprList.head->walk( pd );
597 }
598
599 /* There is a list of expressions to join. */
walkJoin(ParseData * pd)600 FsmAp *Join::walkJoin( ParseData *pd )
601 {
602 /* We enter into a new name scope. */
603 NameFrame nameFrame = pd->enterNameScope( true, 1 );
604
605 /* Evaluate the machines. */
606 FsmAp **fsms = new FsmAp*[exprList.length()];
607 ExprList::Iter expr = exprList;
608 for ( int e = 0; e < exprList.length(); e++, expr++ )
609 fsms[e] = expr->walk( pd );
610
611 /* Get the start and final names. Final is
612 * guaranteed to exist, start is not. */
613 NameInst *startName = pd->curNameInst->start;
614 NameInst *finalName = pd->curNameInst->final;
615
616 int startId = -1;
617 if ( startName != 0 ) {
618 /* Take note that there was an implicit link to the start machine. */
619 pd->localNameScope->referencedNames.append( startName );
620 startId = startName->id;
621 }
622
623 /* A final id of -1 indicates there is no epsilon that references the
624 * final state, therefor do not create one or set an entry point to it. */
625 int finalId = -1;
626 if ( finalName->numRefs > 0 )
627 finalId = finalName->id;
628
629 /* Join machines 1 and up onto machine 0. */
630 FsmAp *retFsm = fsms[0];
631 retFsm->joinOp( startId, finalId, fsms+1, exprList.length()-1 );
632
633 /* We can now unset entry points that are not longer used. */
634 pd->unsetObsoleteEntries( retFsm );
635
636 /* Pop the name scope. */
637 pd->popNameScope( nameFrame );
638
639 delete[] fsms;
640 return retFsm;
641 }
642
makeNameTree(ParseData * pd)643 void Join::makeNameTree( ParseData *pd )
644 {
645 if ( exprList.length() > 1 ) {
646 /* Create the new anonymous scope. */
647 NameInst *prevNameInst = pd->curNameInst;
648 pd->curNameInst = pd->addNameInst( loc, 0, false );
649
650 /* Join scopes need an implicit "final" target. */
651 pd->curNameInst->final = new NameInst( InputLoc(), pd->curNameInst, "final",
652 pd->nextNameId++, false );
653
654 /* Recurse into all expressions in the list. */
655 for ( ExprList::Iter expr = exprList; expr.lte(); expr++ )
656 expr->makeNameTree( pd );
657
658 /* The name scope ends, pop the name instantiation. */
659 pd->curNameInst = prevNameInst;
660 }
661 else {
662 /* Recurse into the single expression. */
663 exprList.head->makeNameTree( pd );
664 }
665 }
666
667
resolveNameRefs(ParseData * pd)668 void Join::resolveNameRefs( ParseData *pd )
669 {
670 /* Branch on whether or not there is to be a join. */
671 if ( exprList.length() > 1 ) {
672 /* The variable definition enters a new scope. */
673 NameFrame nameFrame = pd->enterNameScope( true, 1 );
674
675 /* The join scope must contain a start label. */
676 NameSet resolved = pd->resolvePart( pd->localNameScope, "start", true );
677 if ( resolved.length() > 0 ) {
678 /* Take the first. */
679 pd->curNameInst->start = resolved[0];
680 if ( resolved.length() > 1 ) {
681 /* Complain about the multiple references. */
682 error(loc) << "join operation has multiple start labels" << endl;
683 errorStateLabels( resolved );
684 }
685 }
686
687 /* Make sure there is a start label. */
688 if ( pd->curNameInst->start != 0 ) {
689 /* There is an implicit reference to start name. */
690 pd->curNameInst->start->numRefs += 1;
691 }
692 else {
693 /* No start label. */
694 error(loc) << "join operation has no start label" << endl;
695 }
696
697 /* Recurse into all expressions in the list. */
698 for ( ExprList::Iter expr = exprList; expr.lte(); expr++ )
699 expr->resolveNameRefs( pd );
700
701 /* The name scope ends, pop the name instantiation. */
702 pd->popNameScope( nameFrame );
703 }
704 else {
705 /* Recurse into the single expression. */
706 exprList.head->resolveNameRefs( pd );
707 }
708 }
709
710 /* Clean up after an expression node. */
~Expression()711 Expression::~Expression()
712 {
713 switch ( type ) {
714 case OrType: case IntersectType: case SubtractType:
715 case StrongSubtractType:
716 delete expression;
717 delete term;
718 break;
719 case TermType:
720 delete term;
721 break;
722 case BuiltinType:
723 break;
724 }
725 }
726
727 /* Evaluate a single expression node. */
walk(ParseData * pd,bool lastInSeq)728 FsmAp *Expression::walk( ParseData *pd, bool lastInSeq )
729 {
730 FsmAp *rtnVal = 0;
731 switch ( type ) {
732 case OrType: {
733 /* Evaluate the expression. */
734 rtnVal = expression->walk( pd, false );
735 /* Evaluate the term. */
736 FsmAp *rhs = term->walk( pd );
737 /* Perform union. */
738 rtnVal->unionOp( rhs );
739 afterOpMinimize( rtnVal, lastInSeq );
740 break;
741 }
742 case IntersectType: {
743 /* Evaluate the expression. */
744 rtnVal = expression->walk( pd );
745 /* Evaluate the term. */
746 FsmAp *rhs = term->walk( pd );
747 /* Perform intersection. */
748 rtnVal->intersectOp( rhs );
749 afterOpMinimize( rtnVal, lastInSeq );
750 break;
751 }
752 case SubtractType: {
753 /* Evaluate the expression. */
754 rtnVal = expression->walk( pd );
755 /* Evaluate the term. */
756 FsmAp *rhs = term->walk( pd );
757 /* Perform subtraction. */
758 rtnVal->subtractOp( rhs );
759 afterOpMinimize( rtnVal, lastInSeq );
760 break;
761 }
762 case StrongSubtractType: {
763 /* Evaluate the expression. */
764 rtnVal = expression->walk( pd );
765
766 /* Evaluate the term and pad it with any* machines. */
767 FsmAp *rhs = dotStarFsm( pd );
768 FsmAp *termFsm = term->walk( pd );
769 FsmAp *trailAnyStar = dotStarFsm( pd );
770 rhs->concatOp( termFsm );
771 rhs->concatOp( trailAnyStar );
772
773 /* Perform subtraction. */
774 rtnVal->subtractOp( rhs );
775 afterOpMinimize( rtnVal, lastInSeq );
776 break;
777 }
778 case TermType: {
779 /* Return result of the term. */
780 rtnVal = term->walk( pd );
781 break;
782 }
783 case BuiltinType: {
784 /* Duplicate the builtin. */
785 rtnVal = makeBuiltin( builtin, pd );
786 break;
787 }
788 }
789
790 return rtnVal;
791 }
792
makeNameTree(ParseData * pd)793 void Expression::makeNameTree( ParseData *pd )
794 {
795 switch ( type ) {
796 case OrType:
797 case IntersectType:
798 case SubtractType:
799 case StrongSubtractType:
800 expression->makeNameTree( pd );
801 term->makeNameTree( pd );
802 break;
803 case TermType:
804 term->makeNameTree( pd );
805 break;
806 case BuiltinType:
807 break;
808 }
809 }
810
resolveNameRefs(ParseData * pd)811 void Expression::resolveNameRefs( ParseData *pd )
812 {
813 switch ( type ) {
814 case OrType:
815 case IntersectType:
816 case SubtractType:
817 case StrongSubtractType:
818 expression->resolveNameRefs( pd );
819 term->resolveNameRefs( pd );
820 break;
821 case TermType:
822 term->resolveNameRefs( pd );
823 break;
824 case BuiltinType:
825 break;
826 }
827 }
828
829 /* Clean up after a term node. */
~Term()830 Term::~Term()
831 {
832 switch ( type ) {
833 case ConcatType:
834 case RightStartType:
835 case RightFinishType:
836 case LeftType:
837 delete term;
838 delete factorWithAug;
839 break;
840 case FactorWithAugType:
841 delete factorWithAug;
842 break;
843 }
844 }
845
846 /* Evaluate a term node. */
walk(ParseData * pd,bool lastInSeq)847 FsmAp *Term::walk( ParseData *pd, bool lastInSeq )
848 {
849 FsmAp *rtnVal = 0;
850 switch ( type ) {
851 case ConcatType: {
852 /* Evaluate the Term. */
853 rtnVal = term->walk( pd, false );
854 /* Evaluate the FactorWithRep. */
855 FsmAp *rhs = factorWithAug->walk( pd );
856 /* Perform concatenation. */
857 rtnVal->concatOp( rhs );
858 afterOpMinimize( rtnVal, lastInSeq );
859 break;
860 }
861 case RightStartType: {
862 /* Evaluate the Term. */
863 rtnVal = term->walk( pd );
864
865 /* Evaluate the FactorWithRep. */
866 FsmAp *rhs = factorWithAug->walk( pd );
867
868 /* Set up the priority descriptors. The left machine gets the
869 * lower priority where as the right get the higher start priority. */
870 priorDescs[0].key = pd->nextPriorKey++;
871 priorDescs[0].priority = 0;
872 rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
873
874 /* The start transitions of the right machine gets the higher
875 * priority. Use the same unique key. */
876 priorDescs[1].key = priorDescs[0].key;
877 priorDescs[1].priority = 1;
878 rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
879
880 /* Perform concatenation. */
881 rtnVal->concatOp( rhs );
882 afterOpMinimize( rtnVal, lastInSeq );
883 break;
884 }
885 case RightFinishType: {
886 /* Evaluate the Term. */
887 rtnVal = term->walk( pd );
888
889 /* Evaluate the FactorWithRep. */
890 FsmAp *rhs = factorWithAug->walk( pd );
891
892 /* Set up the priority descriptors. The left machine gets the
893 * lower priority where as the finishing transitions to the right
894 * get the higher priority. */
895 priorDescs[0].key = pd->nextPriorKey++;
896 priorDescs[0].priority = 0;
897 rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
898
899 /* The finishing transitions of the right machine get the higher
900 * priority. Use the same unique key. */
901 priorDescs[1].key = priorDescs[0].key;
902 priorDescs[1].priority = 1;
903 rhs->finishFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
904
905 /* If the right machine's start state is final we need to guard
906 * against the left machine persisting by moving through the empty
907 * string. */
908 if ( rhs->startState->isFinState() ) {
909 rhs->startState->outPriorTable.setPrior(
910 pd->curPriorOrd++, &priorDescs[1] );
911 }
912
913 /* Perform concatenation. */
914 rtnVal->concatOp( rhs );
915 afterOpMinimize( rtnVal, lastInSeq );
916 break;
917 }
918 case LeftType: {
919 /* Evaluate the Term. */
920 rtnVal = term->walk( pd );
921
922 /* Evaluate the FactorWithRep. */
923 FsmAp *rhs = factorWithAug->walk( pd );
924
925 /* Set up the priority descriptors. The left machine gets the
926 * higher priority. */
927 priorDescs[0].key = pd->nextPriorKey++;
928 priorDescs[0].priority = 1;
929 rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
930
931 /* The right machine gets the lower priority. We cannot use
932 * allTransPrior here in case the start state of the right machine
933 * is final. It would allow the right machine thread to run along
934 * with the left if just passing through the start state. Using
935 * startFsmPrior prevents this. */
936 priorDescs[1].key = priorDescs[0].key;
937 priorDescs[1].priority = 0;
938 rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
939
940 /* Perform concatenation. */
941 rtnVal->concatOp( rhs );
942 afterOpMinimize( rtnVal, lastInSeq );
943 break;
944 }
945 case FactorWithAugType: {
946 rtnVal = factorWithAug->walk( pd );
947 break;
948 }
949 }
950 return rtnVal;
951 }
952
makeNameTree(ParseData * pd)953 void Term::makeNameTree( ParseData *pd )
954 {
955 switch ( type ) {
956 case ConcatType:
957 case RightStartType:
958 case RightFinishType:
959 case LeftType:
960 term->makeNameTree( pd );
961 factorWithAug->makeNameTree( pd );
962 break;
963 case FactorWithAugType:
964 factorWithAug->makeNameTree( pd );
965 break;
966 }
967 }
968
resolveNameRefs(ParseData * pd)969 void Term::resolveNameRefs( ParseData *pd )
970 {
971 switch ( type ) {
972 case ConcatType:
973 case RightStartType:
974 case RightFinishType:
975 case LeftType:
976 term->resolveNameRefs( pd );
977 factorWithAug->resolveNameRefs( pd );
978 break;
979 case FactorWithAugType:
980 factorWithAug->resolveNameRefs( pd );
981 break;
982 }
983 }
984
985 /* Clean up after a factor with augmentation node. */
~FactorWithAug()986 FactorWithAug::~FactorWithAug()
987 {
988 delete factorWithRep;
989
990 /* Walk the vector of parser actions, deleting function names. */
991
992 /* Clean up priority descriptors. */
993 if ( priorDescs != 0 )
994 delete[] priorDescs;
995 }
996
assignActions(ParseData * pd,FsmAp * graph,int * actionOrd)997 void FactorWithAug::assignActions( ParseData *pd, FsmAp *graph, int *actionOrd )
998 {
999 /* Assign actions. */
1000 for ( int i = 0; i < actions.length(); i++ ) {
1001 switch ( actions[i].type ) {
1002 /* Transition actions. */
1003 case at_start:
1004 graph->startFsmAction( actionOrd[i], actions[i].action );
1005 afterOpMinimize( graph );
1006 break;
1007 case at_all:
1008 graph->allTransAction( actionOrd[i], actions[i].action );
1009 break;
1010 case at_finish:
1011 graph->finishFsmAction( actionOrd[i], actions[i].action );
1012 break;
1013 case at_leave:
1014 graph->leaveFsmAction( actionOrd[i], actions[i].action );
1015 break;
1016
1017 /* Global error actions. */
1018 case at_start_gbl_error:
1019 graph->startErrorAction( actionOrd[i], actions[i].action, 0 );
1020 afterOpMinimize( graph );
1021 break;
1022 case at_all_gbl_error:
1023 graph->allErrorAction( actionOrd[i], actions[i].action, 0 );
1024 break;
1025 case at_final_gbl_error:
1026 graph->finalErrorAction( actionOrd[i], actions[i].action, 0 );
1027 break;
1028 case at_not_start_gbl_error:
1029 graph->notStartErrorAction( actionOrd[i], actions[i].action, 0 );
1030 break;
1031 case at_not_final_gbl_error:
1032 graph->notFinalErrorAction( actionOrd[i], actions[i].action, 0 );
1033 break;
1034 case at_middle_gbl_error:
1035 graph->middleErrorAction( actionOrd[i], actions[i].action, 0 );
1036 break;
1037
1038 /* Local error actions. */
1039 case at_start_local_error:
1040 graph->startErrorAction( actionOrd[i], actions[i].action,
1041 actions[i].localErrKey );
1042 afterOpMinimize( graph );
1043 break;
1044 case at_all_local_error:
1045 graph->allErrorAction( actionOrd[i], actions[i].action,
1046 actions[i].localErrKey );
1047 break;
1048 case at_final_local_error:
1049 graph->finalErrorAction( actionOrd[i], actions[i].action,
1050 actions[i].localErrKey );
1051 break;
1052 case at_not_start_local_error:
1053 graph->notStartErrorAction( actionOrd[i], actions[i].action,
1054 actions[i].localErrKey );
1055 break;
1056 case at_not_final_local_error:
1057 graph->notFinalErrorAction( actionOrd[i], actions[i].action,
1058 actions[i].localErrKey );
1059 break;
1060 case at_middle_local_error:
1061 graph->middleErrorAction( actionOrd[i], actions[i].action,
1062 actions[i].localErrKey );
1063 break;
1064
1065 /* EOF actions. */
1066 case at_start_eof:
1067 graph->startEOFAction( actionOrd[i], actions[i].action );
1068 afterOpMinimize( graph );
1069 break;
1070 case at_all_eof:
1071 graph->allEOFAction( actionOrd[i], actions[i].action );
1072 break;
1073 case at_final_eof:
1074 graph->finalEOFAction( actionOrd[i], actions[i].action );
1075 break;
1076 case at_not_start_eof:
1077 graph->notStartEOFAction( actionOrd[i], actions[i].action );
1078 break;
1079 case at_not_final_eof:
1080 graph->notFinalEOFAction( actionOrd[i], actions[i].action );
1081 break;
1082 case at_middle_eof:
1083 graph->middleEOFAction( actionOrd[i], actions[i].action );
1084 break;
1085
1086 /* To State Actions. */
1087 case at_start_to_state:
1088 graph->startToStateAction( actionOrd[i], actions[i].action );
1089 afterOpMinimize( graph );
1090 break;
1091 case at_all_to_state:
1092 graph->allToStateAction( actionOrd[i], actions[i].action );
1093 break;
1094 case at_final_to_state:
1095 graph->finalToStateAction( actionOrd[i], actions[i].action );
1096 break;
1097 case at_not_start_to_state:
1098 graph->notStartToStateAction( actionOrd[i], actions[i].action );
1099 break;
1100 case at_not_final_to_state:
1101 graph->notFinalToStateAction( actionOrd[i], actions[i].action );
1102 break;
1103 case at_middle_to_state:
1104 graph->middleToStateAction( actionOrd[i], actions[i].action );
1105 break;
1106
1107 /* From State Actions. */
1108 case at_start_from_state:
1109 graph->startFromStateAction( actionOrd[i], actions[i].action );
1110 afterOpMinimize( graph );
1111 break;
1112 case at_all_from_state:
1113 graph->allFromStateAction( actionOrd[i], actions[i].action );
1114 break;
1115 case at_final_from_state:
1116 graph->finalFromStateAction( actionOrd[i], actions[i].action );
1117 break;
1118 case at_not_start_from_state:
1119 graph->notStartFromStateAction( actionOrd[i], actions[i].action );
1120 break;
1121 case at_not_final_from_state:
1122 graph->notFinalFromStateAction( actionOrd[i], actions[i].action );
1123 break;
1124 case at_middle_from_state:
1125 graph->middleFromStateAction( actionOrd[i], actions[i].action );
1126 break;
1127
1128 /* Remaining cases, prevented by the parser. */
1129 default:
1130 assert( false );
1131 break;
1132 }
1133 }
1134 }
1135
assignPriorities(FsmAp * graph,int * priorOrd)1136 void FactorWithAug::assignPriorities( FsmAp *graph, int *priorOrd )
1137 {
1138 /* Assign priorities. */
1139 for ( int i = 0; i < priorityAugs.length(); i++ ) {
1140 switch ( priorityAugs[i].type ) {
1141 case at_start:
1142 graph->startFsmPrior( priorOrd[i], &priorDescs[i]);
1143 /* Start fsm priorities are a special case that may require
1144 * minimization afterwards. */
1145 afterOpMinimize( graph );
1146 break;
1147 case at_all:
1148 graph->allTransPrior( priorOrd[i], &priorDescs[i] );
1149 break;
1150 case at_finish:
1151 graph->finishFsmPrior( priorOrd[i], &priorDescs[i] );
1152 break;
1153 case at_leave:
1154 graph->leaveFsmPrior( priorOrd[i], &priorDescs[i] );
1155 break;
1156
1157 default:
1158 /* Parser Prevents this case. */
1159 break;
1160 }
1161 }
1162 }
1163
assignConditions(FsmAp * graph)1164 void FactorWithAug::assignConditions( FsmAp *graph )
1165 {
1166 for ( int i = 0; i < conditions.length(); i++ ) {
1167 switch ( conditions[i].type ) {
1168 /* Transition actions. */
1169 case at_start:
1170 graph->startFsmCondition( conditions[i].action, conditions[i].sense );
1171 afterOpMinimize( graph );
1172 break;
1173 case at_all:
1174 graph->allTransCondition( conditions[i].action, conditions[i].sense );
1175 break;
1176 case at_leave:
1177 graph->leaveFsmCondition( conditions[i].action, conditions[i].sense );
1178 break;
1179 default:
1180 break;
1181 }
1182 }
1183 }
1184
1185
1186 /* Evaluate a factor with augmentation node. */
walk(ParseData * pd)1187 FsmAp *FactorWithAug::walk( ParseData *pd )
1188 {
1189 /* Enter into the scopes created for the labels. */
1190 NameFrame nameFrame = pd->enterNameScope( false, labels.length() );
1191
1192 /* Make the array of function orderings. */
1193 int *actionOrd = 0;
1194 if ( actions.length() > 0 )
1195 actionOrd = new int[actions.length()];
1196
1197 /* First walk the list of actions, assigning order to all starting
1198 * actions. */
1199 for ( int i = 0; i < actions.length(); i++ ) {
1200 if ( actions[i].type == at_start ||
1201 actions[i].type == at_start_gbl_error ||
1202 actions[i].type == at_start_local_error ||
1203 actions[i].type == at_start_to_state ||
1204 actions[i].type == at_start_from_state ||
1205 actions[i].type == at_start_eof )
1206 actionOrd[i] = pd->curActionOrd++;
1207 }
1208
1209 /* Evaluate the factor with repetition. */
1210 FsmAp *rtnVal = factorWithRep->walk( pd );
1211
1212 /* Compute the remaining action orderings. */
1213 for ( int i = 0; i < actions.length(); i++ ) {
1214 if ( actions[i].type != at_start &&
1215 actions[i].type != at_start_gbl_error &&
1216 actions[i].type != at_start_local_error &&
1217 actions[i].type != at_start_to_state &&
1218 actions[i].type != at_start_from_state &&
1219 actions[i].type != at_start_eof )
1220 actionOrd[i] = pd->curActionOrd++;
1221 }
1222
1223 /* Embed conditions. */
1224 assignConditions( rtnVal );
1225
1226 /* Embed actions. */
1227 assignActions( pd, rtnVal , actionOrd );
1228
1229 /* Make the array of priority orderings. Orderings are local to this walk
1230 * of the factor with augmentation. */
1231 int *priorOrd = 0;
1232 if ( priorityAugs.length() > 0 )
1233 priorOrd = new int[priorityAugs.length()];
1234
1235 /* Walk all priorities, assigning the priority ordering. */
1236 for ( int i = 0; i < priorityAugs.length(); i++ )
1237 priorOrd[i] = pd->curPriorOrd++;
1238
1239 /* If the priority descriptors have not been made, make them now. Make
1240 * priority descriptors for each priority asignment that will be passed to
1241 * the fsm. Used to keep track of the key, value and used bit. */
1242 if ( priorDescs == 0 && priorityAugs.length() > 0 ) {
1243 priorDescs = new PriorDesc[priorityAugs.length()];
1244 for ( int i = 0; i < priorityAugs.length(); i++ ) {
1245 /* Init the prior descriptor for the priority setting. */
1246 priorDescs[i].key = priorityAugs[i].priorKey;
1247 priorDescs[i].priority = priorityAugs[i].priorValue;
1248 }
1249 }
1250
1251 /* Assign priorities into the machine. */
1252 assignPriorities( rtnVal, priorOrd );
1253
1254 /* Assign epsilon transitions. */
1255 for ( int e = 0; e < epsilonLinks.length(); e++ ) {
1256 /* Get the name, which may not exist. If it doesn't then silently
1257 * ignore it because an error has already been reported. */
1258 NameInst *epTarg = pd->epsilonResolvedLinks[pd->nextEpsilonResolvedLink++];
1259 if ( epTarg != 0 ) {
1260 /* Make the epsilon transitions. */
1261 rtnVal->epsilonTrans( epTarg->id );
1262
1263 /* Note that we have made a link to the name. */
1264 pd->localNameScope->referencedNames.append( epTarg );
1265 }
1266 }
1267
1268 /* Set entry points for labels. */
1269 if ( labels.length() > 0 ) {
1270 /* Pop the names. */
1271 pd->resetNameScope( nameFrame );
1272
1273 /* Make labels that are referenced into entry points. */
1274 for ( int i = 0; i < labels.length(); i++ ) {
1275 pd->enterNameScope( false, 1 );
1276
1277 /* Will always be found. */
1278 NameInst *name = pd->curNameInst;
1279
1280 /* If the name is referenced then set the entry point. */
1281 if ( name->numRefs > 0 )
1282 rtnVal->setEntry( name->id, rtnVal->startState );
1283 }
1284
1285 pd->popNameScope( nameFrame );
1286 }
1287
1288 if ( priorOrd != 0 )
1289 delete[] priorOrd;
1290 if ( actionOrd != 0 )
1291 delete[] actionOrd;
1292 return rtnVal;
1293 }
1294
makeNameTree(ParseData * pd)1295 void FactorWithAug::makeNameTree( ParseData *pd )
1296 {
1297 /* Add the labels to the tree of instantiated names. Each label
1298 * makes a new scope. */
1299 NameInst *prevNameInst = pd->curNameInst;
1300 for ( int i = 0; i < labels.length(); i++ )
1301 pd->curNameInst = pd->addNameInst( labels[i].loc, labels[i].data, true );
1302
1303 /* Recurse, then pop the names. */
1304 factorWithRep->makeNameTree( pd );
1305 pd->curNameInst = prevNameInst;
1306 }
1307
1308
resolveNameRefs(ParseData * pd)1309 void FactorWithAug::resolveNameRefs( ParseData *pd )
1310 {
1311 /* Enter into the name scope created by any labels. */
1312 NameFrame nameFrame = pd->enterNameScope( false, labels.length() );
1313
1314 /* Note action references. */
1315 for ( int i = 0; i < actions.length(); i++ )
1316 actions[i].action->actionRefs.append( pd->localNameScope );
1317
1318 /* Recurse first. IMPORTANT: we must do the exact same traversal as when
1319 * the tree is constructed. */
1320 factorWithRep->resolveNameRefs( pd );
1321
1322 /* Resolve epsilon transitions. */
1323 for ( int ep = 0; ep < epsilonLinks.length(); ep++ ) {
1324 /* Get the link. */
1325 EpsilonLink &link = epsilonLinks[ep];
1326 NameInst *resolvedName = 0;
1327
1328 if ( link.target.length() == 1 && strcmp( link.target.data[0], "final" ) == 0 ) {
1329 /* Epsilon drawn to an implicit final state. An implicit final is
1330 * only available in join operations. */
1331 resolvedName = pd->localNameScope->final;
1332 }
1333 else {
1334 /* Do an search for the name. */
1335 NameSet resolved;
1336 pd->resolveFrom( resolved, pd->localNameScope, link.target, 0 );
1337 if ( resolved.length() > 0 ) {
1338 /* Take the first one. */
1339 resolvedName = resolved[0];
1340 if ( resolved.length() > 1 ) {
1341 /* Complain about the multiple references. */
1342 error(link.loc) << "state reference " << link.target <<
1343 " resolves to multiple entry points" << endl;
1344 errorStateLabels( resolved );
1345 }
1346 }
1347 }
1348
1349 /* This is tricky, we stuff resolved epsilon transitions into one long
1350 * vector in the parse data structure. Since the name resolution and
1351 * graph generation both do identical walks of the parse tree we
1352 * should always find the link resolutions in the right place. */
1353 pd->epsilonResolvedLinks.append( resolvedName );
1354
1355 if ( resolvedName != 0 ) {
1356 /* Found the name, bump of the reference count on it. */
1357 resolvedName->numRefs += 1;
1358 }
1359 else {
1360 /* Complain, no recovery action, the epsilon op will ignore any
1361 * epsilon transitions whose names did not resolve. */
1362 error(link.loc) << "could not resolve label " << link.target << endl;
1363 }
1364 }
1365
1366 if ( labels.length() > 0 )
1367 pd->popNameScope( nameFrame );
1368 }
1369
1370
1371 /* Clean up after a factor with repetition node. */
~FactorWithRep()1372 FactorWithRep::~FactorWithRep()
1373 {
1374 switch ( type ) {
1375 case StarType: case StarStarType: case OptionalType: case PlusType:
1376 case ExactType: case MaxType: case MinType: case RangeType:
1377 delete factorWithRep;
1378 break;
1379 case FactorWithNegType:
1380 delete factorWithNeg;
1381 break;
1382 }
1383 }
1384
1385 /* Evaluate a factor with repetition node. */
walk(ParseData * pd)1386 FsmAp *FactorWithRep::walk( ParseData *pd )
1387 {
1388 FsmAp *retFsm = 0;
1389
1390 switch ( type ) {
1391 case StarType: {
1392 /* Evaluate the FactorWithRep. */
1393 retFsm = factorWithRep->walk( pd );
1394 if ( retFsm->startState->isFinState() ) {
1395 warning(loc) << "applying kleene star to a machine that "
1396 "accepts zero length word" << endl;
1397 retFsm->unsetFinState( retFsm->startState );
1398 }
1399
1400 /* Shift over the start action orders then do the kleene star. */
1401 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1402 retFsm->starOp( );
1403 afterOpMinimize( retFsm );
1404 break;
1405 }
1406 case StarStarType: {
1407 /* Evaluate the FactorWithRep. */
1408 retFsm = factorWithRep->walk( pd );
1409 if ( retFsm->startState->isFinState() ) {
1410 warning(loc) << "applying kleene star to a machine that "
1411 "accepts zero length word" << endl;
1412 }
1413
1414 /* Set up the prior descs. All gets priority one, whereas leaving gets
1415 * priority zero. Make a unique key so that these priorities don't
1416 * interfere with any priorities set by the user. */
1417 priorDescs[0].key = pd->nextPriorKey++;
1418 priorDescs[0].priority = 1;
1419 retFsm->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
1420
1421 /* Leaveing gets priority 0. Use same unique key. */
1422 priorDescs[1].key = priorDescs[0].key;
1423 priorDescs[1].priority = 0;
1424 retFsm->leaveFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
1425
1426 /* Shift over the start action orders then do the kleene star. */
1427 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1428 retFsm->starOp( );
1429 afterOpMinimize( retFsm );
1430 break;
1431 }
1432 case OptionalType: {
1433 /* Make the null fsm. */
1434 FsmAp *nu = new FsmAp();
1435 nu->lambdaFsm( );
1436
1437 /* Evaluate the FactorWithRep. */
1438 retFsm = factorWithRep->walk( pd );
1439
1440 /* Perform the question operator. */
1441 retFsm->unionOp( nu );
1442 afterOpMinimize( retFsm );
1443 break;
1444 }
1445 case PlusType: {
1446 /* Evaluate the FactorWithRep. */
1447 retFsm = factorWithRep->walk( pd );
1448 if ( retFsm->startState->isFinState() ) {
1449 warning(loc) << "applying plus operator to a machine that "
1450 "accepts zero length word" << endl;
1451 }
1452
1453 /* Need a duplicated for the star end. */
1454 FsmAp *dup = new FsmAp( *retFsm );
1455
1456 /* The start func orders need to be shifted before doing the star. */
1457 pd->curActionOrd += dup->shiftStartActionOrder( pd->curActionOrd );
1458
1459 /* Star the duplicate. */
1460 dup->starOp( );
1461 afterOpMinimize( dup );
1462
1463 retFsm->concatOp( dup );
1464 afterOpMinimize( retFsm );
1465 break;
1466 }
1467 case ExactType: {
1468 /* Get an int from the repetition amount. */
1469 if ( lowerRep == 0 ) {
1470 /* No copies. Don't need to evaluate the factorWithRep.
1471 * This Defeats the purpose so give a warning. */
1472 warning(loc) << "exactly zero repetitions results "
1473 "in the null machine" << endl;
1474
1475 retFsm = new FsmAp();
1476 retFsm->lambdaFsm();
1477 }
1478 else {
1479 /* Evaluate the first FactorWithRep. */
1480 retFsm = factorWithRep->walk( pd );
1481 if ( retFsm->startState->isFinState() ) {
1482 warning(loc) << "applying repetition to a machine that "
1483 "accepts zero length word" << endl;
1484 }
1485
1486 /* The start func orders need to be shifted before doing the
1487 * repetition. */
1488 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1489
1490 /* Do the repetition on the machine. Already guarded against n == 0 */
1491 retFsm->repeatOp( lowerRep );
1492 afterOpMinimize( retFsm );
1493 }
1494 break;
1495 }
1496 case MaxType: {
1497 /* Get an int from the repetition amount. */
1498 if ( upperRep == 0 ) {
1499 /* No copies. Don't need to evaluate the factorWithRep.
1500 * This Defeats the purpose so give a warning. */
1501 warning(loc) << "max zero repetitions results "
1502 "in the null machine" << endl;
1503
1504 retFsm = new FsmAp();
1505 retFsm->lambdaFsm();
1506 }
1507 else {
1508 /* Evaluate the first FactorWithRep. */
1509 retFsm = factorWithRep->walk( pd );
1510 if ( retFsm->startState->isFinState() ) {
1511 warning(loc) << "applying max repetition to a machine that "
1512 "accepts zero length word" << endl;
1513 }
1514
1515 /* The start func orders need to be shifted before doing the
1516 * repetition. */
1517 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1518
1519 /* Do the repetition on the machine. Already guarded against n == 0 */
1520 retFsm->optionalRepeatOp( upperRep );
1521 afterOpMinimize( retFsm );
1522 }
1523 break;
1524 }
1525 case MinType: {
1526 /* Evaluate the repeated machine. */
1527 retFsm = factorWithRep->walk( pd );
1528 if ( retFsm->startState->isFinState() ) {
1529 warning(loc) << "applying min repetition to a machine that "
1530 "accepts zero length word" << endl;
1531 }
1532
1533 /* The start func orders need to be shifted before doing the repetition
1534 * and the kleene star. */
1535 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1536
1537 if ( lowerRep == 0 ) {
1538 /* Acts just like a star op on the machine to return. */
1539 retFsm->starOp( );
1540 afterOpMinimize( retFsm );
1541 }
1542 else {
1543 /* Take a duplicate for the plus. */
1544 FsmAp *dup = new FsmAp( *retFsm );
1545
1546 /* Do repetition on the first half. */
1547 retFsm->repeatOp( lowerRep );
1548 afterOpMinimize( retFsm );
1549
1550 /* Star the duplicate. */
1551 dup->starOp( );
1552 afterOpMinimize( dup );
1553
1554 /* Tak on the kleene star. */
1555 retFsm->concatOp( dup );
1556 afterOpMinimize( retFsm );
1557 }
1558 break;
1559 }
1560 case RangeType: {
1561 /* Check for bogus range. */
1562 if ( upperRep - lowerRep < 0 ) {
1563 error(loc) << "invalid range repetition" << endl;
1564
1565 /* Return null machine as recovery. */
1566 retFsm = new FsmAp();
1567 retFsm->lambdaFsm();
1568 }
1569 else if ( lowerRep == 0 && upperRep == 0 ) {
1570 /* No copies. Don't need to evaluate the factorWithRep. This
1571 * defeats the purpose so give a warning. */
1572 warning(loc) << "zero to zero repetitions results "
1573 "in the null machine" << endl;
1574
1575 retFsm = new FsmAp();
1576 retFsm->lambdaFsm();
1577 }
1578 else {
1579 /* Now need to evaluate the repeated machine. */
1580 retFsm = factorWithRep->walk( pd );
1581 if ( retFsm->startState->isFinState() ) {
1582 warning(loc) << "applying range repetition to a machine that "
1583 "accepts zero length word" << endl;
1584 }
1585
1586 /* The start func orders need to be shifted before doing both kinds
1587 * of repetition. */
1588 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1589
1590 if ( lowerRep == 0 ) {
1591 /* Just doing max repetition. Already guarded against n == 0. */
1592 retFsm->optionalRepeatOp( upperRep );
1593 afterOpMinimize( retFsm );
1594 }
1595 else if ( lowerRep == upperRep ) {
1596 /* Just doing exact repetition. Already guarded against n == 0. */
1597 retFsm->repeatOp( lowerRep );
1598 afterOpMinimize( retFsm );
1599 }
1600 else {
1601 /* This is the case that 0 < lowerRep < upperRep. Take a
1602 * duplicate for the optional repeat. */
1603 FsmAp *dup = new FsmAp( *retFsm );
1604
1605 /* Do repetition on the first half. */
1606 retFsm->repeatOp( lowerRep );
1607 afterOpMinimize( retFsm );
1608
1609 /* Do optional repetition on the second half. */
1610 dup->optionalRepeatOp( upperRep - lowerRep );
1611 afterOpMinimize( dup );
1612
1613 /* Tak on the duplicate machine. */
1614 retFsm->concatOp( dup );
1615 afterOpMinimize( retFsm );
1616 }
1617 }
1618 break;
1619 }
1620 case FactorWithNegType: {
1621 /* Evaluate the Factor. Pass it up. */
1622 retFsm = factorWithNeg->walk( pd );
1623 break;
1624 }}
1625 return retFsm;
1626 }
1627
makeNameTree(ParseData * pd)1628 void FactorWithRep::makeNameTree( ParseData *pd )
1629 {
1630 switch ( type ) {
1631 case StarType:
1632 case StarStarType:
1633 case OptionalType:
1634 case PlusType:
1635 case ExactType:
1636 case MaxType:
1637 case MinType:
1638 case RangeType:
1639 factorWithRep->makeNameTree( pd );
1640 break;
1641 case FactorWithNegType:
1642 factorWithNeg->makeNameTree( pd );
1643 break;
1644 }
1645 }
1646
resolveNameRefs(ParseData * pd)1647 void FactorWithRep::resolveNameRefs( ParseData *pd )
1648 {
1649 switch ( type ) {
1650 case StarType:
1651 case StarStarType:
1652 case OptionalType:
1653 case PlusType:
1654 case ExactType:
1655 case MaxType:
1656 case MinType:
1657 case RangeType:
1658 factorWithRep->resolveNameRefs( pd );
1659 break;
1660 case FactorWithNegType:
1661 factorWithNeg->resolveNameRefs( pd );
1662 break;
1663 }
1664 }
1665
1666 /* Clean up after a factor with negation node. */
~FactorWithNeg()1667 FactorWithNeg::~FactorWithNeg()
1668 {
1669 switch ( type ) {
1670 case NegateType:
1671 case CharNegateType:
1672 delete factorWithNeg;
1673 break;
1674 case FactorType:
1675 delete factor;
1676 break;
1677 }
1678 }
1679
1680 /* Evaluate a factor with negation node. */
walk(ParseData * pd)1681 FsmAp *FactorWithNeg::walk( ParseData *pd )
1682 {
1683 FsmAp *retFsm = 0;
1684
1685 switch ( type ) {
1686 case NegateType: {
1687 /* Evaluate the factorWithNeg. */
1688 FsmAp *toNegate = factorWithNeg->walk( pd );
1689
1690 /* Negation is subtract from dot-star. */
1691 retFsm = dotStarFsm( pd );
1692 retFsm->subtractOp( toNegate );
1693 afterOpMinimize( retFsm );
1694 break;
1695 }
1696 case CharNegateType: {
1697 /* Evaluate the factorWithNeg. */
1698 FsmAp *toNegate = factorWithNeg->walk( pd );
1699
1700 /* CharNegation is subtract from dot. */
1701 retFsm = dotFsm( pd );
1702 retFsm->subtractOp( toNegate );
1703 afterOpMinimize( retFsm );
1704 break;
1705 }
1706 case FactorType: {
1707 /* Evaluate the Factor. Pass it up. */
1708 retFsm = factor->walk( pd );
1709 break;
1710 }}
1711 return retFsm;
1712 }
1713
makeNameTree(ParseData * pd)1714 void FactorWithNeg::makeNameTree( ParseData *pd )
1715 {
1716 switch ( type ) {
1717 case NegateType:
1718 case CharNegateType:
1719 factorWithNeg->makeNameTree( pd );
1720 break;
1721 case FactorType:
1722 factor->makeNameTree( pd );
1723 break;
1724 }
1725 }
1726
resolveNameRefs(ParseData * pd)1727 void FactorWithNeg::resolveNameRefs( ParseData *pd )
1728 {
1729 switch ( type ) {
1730 case NegateType:
1731 case CharNegateType:
1732 factorWithNeg->resolveNameRefs( pd );
1733 break;
1734 case FactorType:
1735 factor->resolveNameRefs( pd );
1736 break;
1737 }
1738 }
1739
1740 /* Clean up after a factor node. */
~Factor()1741 Factor::~Factor()
1742 {
1743 switch ( type ) {
1744 case LiteralType:
1745 delete literal;
1746 break;
1747 case RangeType:
1748 delete range;
1749 break;
1750 case OrExprType:
1751 delete reItem;
1752 break;
1753 case RegExprType:
1754 delete regExpr;
1755 break;
1756 case ReferenceType:
1757 break;
1758 case ParenType:
1759 delete join;
1760 break;
1761 case LongestMatchType:
1762 delete longestMatch;
1763 break;
1764 }
1765 }
1766
1767 /* Evaluate a factor node. */
walk(ParseData * pd)1768 FsmAp *Factor::walk( ParseData *pd )
1769 {
1770 FsmAp *rtnVal = 0;
1771 switch ( type ) {
1772 case LiteralType:
1773 rtnVal = literal->walk( pd );
1774 break;
1775 case RangeType:
1776 rtnVal = range->walk( pd );
1777 break;
1778 case OrExprType:
1779 rtnVal = reItem->walk( pd, 0 );
1780 break;
1781 case RegExprType:
1782 rtnVal = regExpr->walk( pd, 0 );
1783 break;
1784 case ReferenceType:
1785 rtnVal = varDef->walk( pd );
1786 break;
1787 case ParenType:
1788 rtnVal = join->walk( pd );
1789 break;
1790 case LongestMatchType:
1791 rtnVal = longestMatch->walk( pd );
1792 break;
1793 }
1794
1795 return rtnVal;
1796 }
1797
makeNameTree(ParseData * pd)1798 void Factor::makeNameTree( ParseData *pd )
1799 {
1800 switch ( type ) {
1801 case LiteralType:
1802 case RangeType:
1803 case OrExprType:
1804 case RegExprType:
1805 break;
1806 case ReferenceType:
1807 varDef->makeNameTree( loc, pd );
1808 break;
1809 case ParenType:
1810 join->makeNameTree( pd );
1811 break;
1812 case LongestMatchType:
1813 longestMatch->makeNameTree( pd );
1814 break;
1815 }
1816 }
1817
resolveNameRefs(ParseData * pd)1818 void Factor::resolveNameRefs( ParseData *pd )
1819 {
1820 switch ( type ) {
1821 case LiteralType:
1822 case RangeType:
1823 case OrExprType:
1824 case RegExprType:
1825 break;
1826 case ReferenceType:
1827 varDef->resolveNameRefs( pd );
1828 break;
1829 case ParenType:
1830 join->resolveNameRefs( pd );
1831 break;
1832 case LongestMatchType:
1833 longestMatch->resolveNameRefs( pd );
1834 break;
1835 }
1836 }
1837
1838 /* Clean up a range object. Must delete the two literals. */
~Range()1839 Range::~Range()
1840 {
1841 delete lowerLit;
1842 delete upperLit;
1843 }
1844
1845 /* Evaluate a range. Gets the lower an upper key and makes an fsm range. */
walk(ParseData * pd)1846 FsmAp *Range::walk( ParseData *pd )
1847 {
1848 /* Construct and verify the suitability of the lower end of the range. */
1849 FsmAp *lowerFsm = lowerLit->walk( pd );
1850 if ( !lowerFsm->checkSingleCharMachine() ) {
1851 error(lowerLit->token.loc) <<
1852 "bad range lower end, must be a single character" << endl;
1853 }
1854
1855 /* Construct and verify the upper end. */
1856 FsmAp *upperFsm = upperLit->walk( pd );
1857 if ( !upperFsm->checkSingleCharMachine() ) {
1858 error(upperLit->token.loc) <<
1859 "bad range upper end, must be a single character" << endl;
1860 }
1861
1862 /* Grab the keys from the machines, then delete them. */
1863 Key lowKey = lowerFsm->startState->outList.head->lowKey;
1864 Key highKey = upperFsm->startState->outList.head->lowKey;
1865 delete lowerFsm;
1866 delete upperFsm;
1867
1868 /* Validate the range. */
1869 if ( lowKey > highKey ) {
1870 /* Recover by setting upper to lower; */
1871 error(lowerLit->token.loc) << "lower end of range is greater then upper end" << endl;
1872 highKey = lowKey;
1873 }
1874
1875 /* Return the range now that it is validated. */
1876 FsmAp *retFsm = new FsmAp();
1877 retFsm->rangeFsm( lowKey, highKey );
1878 return retFsm;
1879 }
1880
1881 /* Evaluate a literal object. */
walk(ParseData * pd)1882 FsmAp *Literal::walk( ParseData *pd )
1883 {
1884 /* FsmAp to return, is the alphabet signed. */
1885 FsmAp *rtnVal = 0;
1886
1887 switch ( type ) {
1888 case Number: {
1889 /* Make the fsm key in int format. */
1890 Key fsmKey = makeFsmKeyNum( token.data, token.loc, pd );
1891 /* Make the new machine. */
1892 rtnVal = new FsmAp();
1893 rtnVal->concatFsm( fsmKey );
1894 break;
1895 }
1896 case LitString: {
1897 /* Make the array of keys in int format. */
1898 long length;
1899 bool caseInsensitive;
1900 char *data = prepareLitString( token.loc, token.data, token.length,
1901 length, caseInsensitive );
1902 Key *arr = new Key[length];
1903 makeFsmKeyArray( arr, data, length, pd );
1904
1905 /* Make the new machine. */
1906 rtnVal = new FsmAp();
1907 if ( caseInsensitive )
1908 rtnVal->concatFsmCI( arr, length );
1909 else
1910 rtnVal->concatFsm( arr, length );
1911 delete[] data;
1912 delete[] arr;
1913 break;
1914 }}
1915 return rtnVal;
1916 }
1917
1918 /* Clean up after a regular expression object. */
~RegExpr()1919 RegExpr::~RegExpr()
1920 {
1921 switch ( type ) {
1922 case RecurseItem:
1923 delete regExpr;
1924 delete item;
1925 break;
1926 case Empty:
1927 break;
1928 }
1929 }
1930
1931 /* Evaluate a regular expression object. */
walk(ParseData * pd,RegExpr * rootRegex)1932 FsmAp *RegExpr::walk( ParseData *pd, RegExpr *rootRegex )
1933 {
1934 /* This is the root regex, pass down a pointer to this. */
1935 if ( rootRegex == 0 )
1936 rootRegex = this;
1937
1938 FsmAp *rtnVal = 0;
1939 switch ( type ) {
1940 case RecurseItem: {
1941 /* Walk both items. */
1942 rtnVal = regExpr->walk( pd, rootRegex );
1943 FsmAp *fsm2 = item->walk( pd, rootRegex );
1944 rtnVal->concatOp( fsm2 );
1945 break;
1946 }
1947 case Empty: {
1948 rtnVal = new FsmAp();
1949 rtnVal->lambdaFsm();
1950 break;
1951 }
1952 }
1953 return rtnVal;
1954 }
1955
1956 /* Clean up after an item in a regular expression. */
~ReItem()1957 ReItem::~ReItem()
1958 {
1959 switch ( type ) {
1960 case Data:
1961 case Dot:
1962 break;
1963 case OrBlock:
1964 case NegOrBlock:
1965 delete orBlock;
1966 break;
1967 }
1968 }
1969
1970 /* Evaluate a regular expression object. */
walk(ParseData * pd,RegExpr * rootRegex)1971 FsmAp *ReItem::walk( ParseData *pd, RegExpr *rootRegex )
1972 {
1973 /* The fsm to return, is the alphabet signed? */
1974 FsmAp *rtnVal = 0;
1975
1976 switch ( type ) {
1977 case Data: {
1978 /* Move the data into an integer array and make a concat fsm. */
1979 Key *arr = new Key[token.length];
1980 makeFsmKeyArray( arr, token.data, token.length, pd );
1981
1982 /* Make the concat fsm. */
1983 rtnVal = new FsmAp();
1984 if ( rootRegex != 0 && rootRegex->caseInsensitive )
1985 rtnVal->concatFsmCI( arr, token.length );
1986 else
1987 rtnVal->concatFsm( arr, token.length );
1988 delete[] arr;
1989 break;
1990 }
1991 case Dot: {
1992 /* Make the dot fsm. */
1993 rtnVal = dotFsm( pd );
1994 break;
1995 }
1996 case OrBlock: {
1997 /* Get the or block and minmize it. */
1998 rtnVal = orBlock->walk( pd, rootRegex );
1999 if ( rtnVal == 0 ) {
2000 rtnVal = new FsmAp();
2001 rtnVal->lambdaFsm();
2002 }
2003 rtnVal->minimizePartition2();
2004 break;
2005 }
2006 case NegOrBlock: {
2007 /* Get the or block and minimize it. */
2008 FsmAp *fsm = orBlock->walk( pd, rootRegex );
2009 fsm->minimizePartition2();
2010
2011 /* Make a dot fsm and subtract from it. */
2012 rtnVal = dotFsm( pd );
2013 rtnVal->subtractOp( fsm );
2014 rtnVal->minimizePartition2();
2015 break;
2016 }
2017 }
2018
2019 /* If the item is followed by a star, then apply the star op. */
2020 if ( star ) {
2021 if ( rtnVal->startState->isFinState() ) {
2022 warning(loc) << "applying kleene star to a machine that "
2023 "accepts zero length word" << endl;
2024 }
2025
2026 rtnVal->starOp();
2027 rtnVal->minimizePartition2();
2028 }
2029 return rtnVal;
2030 }
2031
2032 /* Clean up after an or block of a regular expression. */
~ReOrBlock()2033 ReOrBlock::~ReOrBlock()
2034 {
2035 switch ( type ) {
2036 case RecurseItem:
2037 delete orBlock;
2038 delete item;
2039 break;
2040 case Empty:
2041 break;
2042 }
2043 }
2044
2045
2046 /* Evaluate an or block of a regular expression. */
walk(ParseData * pd,RegExpr * rootRegex)2047 FsmAp *ReOrBlock::walk( ParseData *pd, RegExpr *rootRegex )
2048 {
2049 FsmAp *rtnVal = 0;
2050 switch ( type ) {
2051 case RecurseItem: {
2052 /* Evaluate the two fsm. */
2053 FsmAp *fsm1 = orBlock->walk( pd, rootRegex );
2054 FsmAp *fsm2 = item->walk( pd, rootRegex );
2055 if ( fsm1 == 0 )
2056 rtnVal = fsm2;
2057 else {
2058 fsm1->unionOp( fsm2 );
2059 rtnVal = fsm1;
2060 }
2061 break;
2062 }
2063 case Empty: {
2064 rtnVal = 0;
2065 break;
2066 }
2067 }
2068 return rtnVal;;
2069 }
2070
2071 /* Evaluate an or block item of a regular expression. */
walk(ParseData * pd,RegExpr * rootRegex)2072 FsmAp *ReOrItem::walk( ParseData *pd, RegExpr *rootRegex )
2073 {
2074 /* The return value, is the alphabet signed? */
2075 FsmAp *rtnVal = 0;
2076 switch ( type ) {
2077 case Data: {
2078 /* Make the or machine. */
2079 rtnVal = new FsmAp();
2080
2081 /* Put the or data into an array of ints. Note that we find unique
2082 * keys. Duplicates are silently ignored. The alternative would be to
2083 * issue warning or an error but since we can't with [a0-9a] or 'a' |
2084 * 'a' don't bother here. */
2085 KeySet keySet;
2086 makeFsmUniqueKeyArray( keySet, token.data, token.length,
2087 rootRegex != 0 ? rootRegex->caseInsensitive : false, pd );
2088
2089 /* Run the or operator. */
2090 rtnVal->orFsm( keySet.data, keySet.length() );
2091 break;
2092 }
2093 case Range: {
2094 /* Make the upper and lower keys. */
2095 Key lowKey = makeFsmKeyChar( lower, pd );
2096 Key highKey = makeFsmKeyChar( upper, pd );
2097
2098 /* Validate the range. */
2099 if ( lowKey > highKey ) {
2100 /* Recover by setting upper to lower; */
2101 error(loc) << "lower end of range is greater then upper end" << endl;
2102 highKey = lowKey;
2103 }
2104
2105 /* Make the range machine. */
2106 rtnVal = new FsmAp();
2107 rtnVal->rangeFsm( lowKey, highKey );
2108
2109 if ( rootRegex != 0 && rootRegex->caseInsensitive ) {
2110 if ( lowKey <= 'Z' && 'A' <= highKey ) {
2111 Key otherLow = lowKey < 'A' ? Key('A') : lowKey;
2112 Key otherHigh = 'Z' < highKey ? Key('Z') : highKey;
2113
2114 otherLow = 'a' + ( otherLow - 'A' );
2115 otherHigh = 'a' + ( otherHigh - 'A' );
2116
2117 FsmAp *otherRange = new FsmAp();
2118 otherRange->rangeFsm( otherLow, otherHigh );
2119 rtnVal->unionOp( otherRange );
2120 rtnVal->minimizePartition2();
2121 }
2122 else if ( lowKey <= 'z' && 'a' <= highKey ) {
2123 Key otherLow = lowKey < 'a' ? Key('a') : lowKey;
2124 Key otherHigh = 'z' < highKey ? Key('z') : highKey;
2125
2126 otherLow = 'A' + ( otherLow - 'a' );
2127 otherHigh = 'A' + ( otherHigh - 'a' );
2128
2129 FsmAp *otherRange = new FsmAp();
2130 otherRange->rangeFsm( otherLow, otherHigh );
2131 rtnVal->unionOp( otherRange );
2132 rtnVal->minimizePartition2();
2133 }
2134 }
2135
2136 break;
2137 }}
2138 return rtnVal;
2139 }
2140